Use MetaTensor instead of Initializer

pull/7373/head
lilei 4 years ago
parent 56b7562bf5
commit 7b135990e7

@ -15,16 +15,13 @@
"""Initializer for cell parameters."""
import numbers
import math
import copy
from functools import reduce
import numpy as np
from scipy.stats import truncnorm
from mindspore import log as logger
from . import dtype as mstype
from .tensor import Tensor
from .seed import get_seed
from .tensor import Tensor, MetaTensor
from .._c_expression import random_normal
_INITIALIZER_ALIAS = dict()
@ -52,54 +49,6 @@ class Initializer:
def __call__(self, arr):
return self._initialize(arr)
@property
def shape(self):
return self._shape
@shape.setter
def shape(self, shape):
self._shape = shape
@property
def dtype(self):
return self._dtype
@dtype.setter
def dtype(self, dtype):
self._dtype = dtype
def to_tensor(self, slice_index=None, shape=None):
"""
Get the tensor format data of this Initializer.
Args:
slice_index (int): Slice index of a parameter's slices.
It is used when initialize a slice of a parameter, it guarantees that devices
using the same slice can generate the same tensor.
shape (list[int]): Shape of the slice, it is used when initialize a slice of the parameter.
"""
arr = None
if shape is None:
shape = self.shape
try:
arr = np.ndarray(shape, dtype=mstype.dtype_to_nptype(self.dtype))
except ValueError:
msg = "Error shape={}".format(shape)
logger.error(msg)
raise ValueError(msg)
global_seed = get_seed()
need_set_seed = ((slice_index is not None) and (global_seed is None))
seed_saved = np.random.get_state()[1][0]
if need_set_seed:
np.random.seed(slice_index)
self.__call__(arr)
if need_set_seed:
np.random.seed(seed_saved)
return Tensor(arr, dtype=self.dtype)
def _register(*aliases):
"""Return the alias register."""
def alias_reg(cls):
@ -478,27 +427,16 @@ def initializer(init, shape=None, dtype=mstype.float32):
if not isinstance(value, int) or value <= 0:
raise ValueError(f"shape is invalid, shape value must be positive integer, shape:{shape}")
if isinstance(init, Initializer):
init_copy = copy.deepcopy(init)
init_copy.shape = shape if shape is not None else init.shape
init_copy.dtype = init.dtype if init.dtype is not None else dtype
return init_copy
if isinstance(init, str):
init_obj = _INITIALIZER_ALIAS[init.lower()]()
if init_obj is None:
init = _INITIALIZER_ALIAS[init.lower()]()
if init is None:
raise ValueError("The class corresponding to '{}' was not found.".format(init))
init = init_obj
init.shape = shape
init.dtype = dtype
return init
if isinstance(init, numbers.Number):
init_obj = Constant(init)
init_obj.shape = shape
init_obj.dtype = dtype
elif isinstance(init, numbers.Number):
init = Constant(init)
shape = shape if shape is not None else init.shape
dtype = init.dtype if init.dtype is not None else dtype
init_obj = MetaTensor(init, dtype, shape)
return init_obj
raise TypeError("Unsupported init type '{}'.".format(type(init)))
__all__ = [
'Initializer',

@ -16,8 +16,9 @@
"""Parameter for cell."""
from copy import copy
from .._c_expression import ParamInfo
from .._c_expression import MetaTensor as MetaTensor_
from . import dtype as mstype
from .initializer import initializer, Initializer
from .initializer import initializer
from .tensor import Tensor, MetaTensor
from .._checkparam import _check_str_by_regular
from ..parallel._tensor import _get_slice_index
@ -34,14 +35,14 @@ def _is_in_parallel_mode():
return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
class Parameter(MetaTensor):
class Parameter(MetaTensor_):
"""
Parameter types of cell models.
After initialized `Parameter` is a subtype of `Tensor`.
In auto_parallel mode of "semi_auto_parallel" and "auto_parallel", if init `Parameter` by
an `Initializer`, the type of Parameter will be `MetaTensor` not `Tensor`. `MetaTensor`
an `MetaTensor`, the type of Parameter will be `MetaTensor` not `Tensor`. `MetaTensor_`
only saves the shape and type info of a tensor with no memory usage. The shape can be changed while
compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data.
@ -52,7 +53,7 @@ class Parameter(MetaTensor):
then the Parameters as this part of the inputs are not allowed to be cast.
Args:
default_input (Union[Tensor, Initializer, Number]): Parameter data, to be set initialized.
default_input (Union[Tensor, MetaTensor, Number]): Parameter data, to be set initialized.
name (str): Name of the child parameter.
requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode,
@ -94,9 +95,9 @@ class Parameter(MetaTensor):
input_class.__init__(obj, *class_init_args)
# it's better to make the Initializer a kind of metatensor.
obj.init_mode = None
obj.is_default_input_initializer = False
if isinstance(default_input, Initializer):
obj.is_default_input_initializer = True
obj.is_default_input_meta = False
if isinstance(default_input, MetaTensor):
obj.is_default_input_meta = True
if not isinstance(obj, Tensor):
obj.init_mode = default_input
return obj
@ -142,10 +143,10 @@ class Parameter(MetaTensor):
"""Set `set_data` of current `Parameter`."""
if isinstance(data, bool):
raise ValueError('Parameter data can not be `bool`')
if isinstance(data, Initializer):
if isinstance(data, MetaTensor):
if _is_in_parallel_mode():
# do not init data while in auto parallel.
return (MetaTensor, data.dtype, data.shape)
return (MetaTensor_, data.dtype, data.shape)
data = data.to_tensor()
if isinstance(data, Tensor):
# make a copy of Tensor to init the parameter
@ -257,7 +258,7 @@ class Parameter(MetaTensor):
Args:
prefix (str): Namespace of parameter. The cloned Parameter name is
combined of prefix and current name: `f"{perfix}.{self.name}"`.
init (Union[Tensor, str, Initializer, numbers.Number]): Initialize the shape of the parameter.
init (Union[Tensor, str, MetaTensor, numbers.Number]): Initialize the shape of the parameter.
Default: 'same'.
Returns:
@ -314,7 +315,7 @@ class Parameter(MetaTensor):
Set `set_data` of current `Parameter`.
Args:
data (Union[Tensor, Initializer, int, float]): new data.
data (Union[Tensor, MetaTensor, int, float]): new data.
slice_shape (bool): If slice the Parameter, will not check if shape is match. Default: False.
Retruns:
@ -325,9 +326,9 @@ class Parameter(MetaTensor):
f"Current dtype is {self.dtype}, and incoming is {incoming}. "
f"Use .set_dtype(xxx) to change the dtype.")
if not isinstance(data, (MetaTensor, Initializer, int, float)):
raise TypeError(f"Parameter data must be [`Initializer`, `int`, `float`] or a kind of `MetaTensor` "
f"(like `Tensor` or `MetaTensor`). But with type {type(data)}.")
if not isinstance(data, (MetaTensor_, int, float)):
raise TypeError(f"Parameter data must be [`MetaTensor`, `int`, `float`] or a kind of `MetaTensor_` "
f"(like `Tensor` or `MetaTensor_`). But with type {type(data)}.")
if isinstance(data, (int, float)):
if self.dtype in mstype.int_type and isinstance(data, float):
raise_type_error(mstype.float_)
@ -337,8 +338,8 @@ class Parameter(MetaTensor):
is_current_tensor = isinstance(self, Tensor)
if is_incoming_tensor and not is_current_tensor:
raise TypeError("Parameter is a `MetaTensor` and not initializered, `data` for `set_data`"
"should be a Initializer. If you want to update it by Tensor, call method"
raise TypeError("Parameter is a `MetaTensor_` and not initializered, `data` for `set_data`"
"should be a MetaTensor. If you want to update it by Tensor, call method"
"`init_parameters_data` of `Cell` to init and replace all the Parameter of"
"network, then call this method.")
if tuple(self.shape) != tuple(data.shape):
@ -351,7 +352,7 @@ class Parameter(MetaTensor):
raise_type_error(data.dtype)
else:
data = Tensor(data, self.dtype)
if isinstance(data, Initializer):
if isinstance(data, MetaTensor):
# The parameter has been initializered, directly update by the data
if is_current_tensor:
self._update_tensor_data(data.to_tensor())
@ -387,10 +388,10 @@ class Parameter(MetaTensor):
Parameter, the `Parameter` after initializing data. If current `Parameter` was already initialized before,
returns the same initialized `Parameter`.
"""
if self.is_default_input_initializer:
if self.is_default_input_meta:
is_current_in_parallel = _is_in_parallel_mode()
if self.is_in_parallel != is_current_in_parallel:
raise RuntimeError("Must set or change parallel mode before any Initializer created.")
raise RuntimeError("Must set or change parallel mode before any MetaTensor created.")
if self.init_mode is None:
return self
if self.inited_param is not None:
@ -401,12 +402,12 @@ class Parameter(MetaTensor):
if len(layout) < 3:
raise ValueError("The length of layout must be larger than 3! layout is {}.".format(layout))
slice_index = int(_get_slice_index(layout[0], layout[1]))
if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Initializer)):
if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, MetaTensor)):
data = self.init_mode.to_tensor(0, [1])
else:
data = self.init_mode.to_tensor(slice_index, layout[2])
else:
if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Initializer)):
if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, MetaTensor)):
data = self.init_mode.to_tensor(0, [1])
else:
data = self.init_mode.to_tensor()

@ -15,8 +15,9 @@
"""Tensor implementation."""
import numpy as np
from mindspore import log as logger
from .._c_expression import Tensor as Tensor_
from .._c_expression import MetaTensor
from .._c_expression import MetaTensor as MetaTensor_
from .._checkparam import check_type, check_typename
from . import dtype as mstype
from ._register_for_tensor import tensor_operator_registry
@ -395,6 +396,50 @@ class SparseTensor:
return self.__dense_shape
class MetaTensor(MetaTensor_):
"""
The base class of the MetaTensor.
Initialization of tensor basic attributes and model weight values.
Returns:
Array, an array after being initialized.
"""
def __init__(self, init, dtype, shape):
#check param
self.init = init
MetaTensor_.__init__(self, dtype, shape)
def to_tensor(self, slice_index=None, shape=None):
"""
Get the tensor format data of this MetaTensor.
Args:
slice_index (int): Slice index of a parameter's slices.
It is used when initialize a slice of a parameter, it guarantees that devices
using the same slice can generate the same tensor.
shape (list[int]): Shape of the slice, it is used when initialize a slice of the parameter.
"""
if shape is None:
shape = self.shape
try:
arr = np.ndarray(shape, dtype=mstype.dtype_to_nptype(self.dtype))
except ValueError:
msg = "Error shape={}".format(shape)
logger.error(msg)
raise ValueError(msg)
from .seed import get_seed
global_seed = get_seed()
need_set_seed = ((slice_index is not None) and (global_seed is None))
seed_saved = np.random.get_state()[1][0]
if need_set_seed:
np.random.seed(slice_index)
self.init(arr)
if need_set_seed:
np.random.seed(seed_saved)
return Tensor(arr, dtype=self.dtype)
def _vm_compare(*args):
"""Implement `vm_compare` for tensor."""
obj_str = args[-1]

@ -32,7 +32,6 @@ from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops import composite as CP
from mindspore.nn.optim.momentum import Momentum
from mindspore.common.initializer import initializer
from mindspore.nn.wrap.cell_wrapper import WithLossCell
random.seed(1)
@ -43,14 +42,6 @@ ds.config.set_seed(1)
grad_by_list = CP.GradOperation(get_by_list=True)
def weight_variable(shape):
return initializer('XavierUniform', shape=shape, dtype=mstype.float32)
def weight_variable_uniform(shape):
return initializer('Uniform', shape=shape, dtype=mstype.float32)
def weight_variable_0(shape):
zeros = np.zeros(shape).astype(np.float32)
return Tensor(zeros)
@ -63,26 +54,23 @@ def weight_variable_1(shape):
def conv3x3(in_channels, out_channels, stride=1, padding=0):
"""3x3 convolution """
weight_shape = (out_channels, in_channels, 3, 3)
weight = weight_variable(weight_shape)
return nn.Conv2d(in_channels, out_channels,
kernel_size=3, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
kernel_size=3, stride=stride, padding=padding, weight_init='XavierUniform',
has_bias=False, pad_mode="same")
def conv1x1(in_channels, out_channels, stride=1, padding=0):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 1, 1)
weight = weight_variable(weight_shape)
return nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
kernel_size=1, stride=stride, padding=padding, weight_init='XavierUniform',
has_bias=False, pad_mode="same")
def conv7x7(in_channels, out_channels, stride=1, padding=0):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 7, 7)
weight = weight_variable(weight_shape)
return nn.Conv2d(in_channels, out_channels,
kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
kernel_size=7, stride=stride, padding=padding, weight_init='XavierUniform',
has_bias=False, pad_mode="same")
def bn_with_initialize(out_channels):
@ -90,8 +78,7 @@ def bn_with_initialize(out_channels):
mean = weight_variable_0(shape)
var = weight_variable_1(shape)
beta = weight_variable_0(shape)
gamma = weight_variable_uniform(shape)
bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init='Uniform',
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
return bn
@ -101,18 +88,13 @@ def bn_with_initialize_last(out_channels):
mean = weight_variable_0(shape)
var = weight_variable_1(shape)
beta = weight_variable_0(shape)
gamma = weight_variable_uniform(shape)
bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init='Uniform',
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
return bn
def fc_with_initialize(input_channels, out_channels):
weight_shape = (out_channels, input_channels)
weight = weight_variable(weight_shape)
bias_shape = (out_channels)
bias = weight_variable_uniform(bias_shape)
return nn.Dense(input_channels, out_channels, weight, bias)
return nn.Dense(input_channels, out_channels, weight_init='XavierUniform', bias_init='Uniform')
class ResidualBlock(nn.Cell):

@ -20,7 +20,7 @@ import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.common import MetaTensor
from mindspore._c_expression import MetaTensor as MetaTensor_
from mindspore.common import dtype
from mindspore.common.api import ms_function
from mindspore.ops import functional as F
@ -70,8 +70,8 @@ def scalar_mul_while(x):
return rv
@ms_function(input_signature=(MetaTensor(dtype.float32, (1, 1, 3, 3)),
MetaTensor(dtype.float32, (1, 1, 3, 3))))
@ms_function(input_signature=(MetaTensor_(dtype.float32, (1, 1, 3, 3)),
MetaTensor_(dtype.float32, (1, 1, 3, 3))))
def tensor_add_test(x, y):
""" tensor_add_test """
z = F.tensor_add(x, y)

@ -24,7 +24,7 @@ import mindspore.common.initializer as init
import mindspore.nn as nn
from mindspore import context
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.common.tensor import Tensor, MetaTensor
from mindspore.nn import Conv2d
from mindspore.ops import operations as P
from ..ut_filter import non_graph_engine
@ -58,7 +58,7 @@ def _check_uniform(tensor, boundary_a, boundary_b):
def test_init_Initializer():
tensor = init.initializer(InitTwo(), [2, 2], ms.int32)
assert tensor.shape == (2, 2)
assert tensor.shape == [2, 2]
_check_value(tensor.to_tensor(), 2, 2)
@ -119,22 +119,22 @@ def test_init_uniform_alias():
def test_init_normal():
tensor = init.initializer(init.Normal(), [5, 4], ms.float32)
assert isinstance(tensor, init.Normal), 'Normal init failed!'
assert isinstance(tensor, MetaTensor), 'Normal init failed!'
def test_init_truncated_normal():
tensor = init.initializer(init.TruncatedNormal(), [5, 4], ms.float32)
assert isinstance(tensor, init.TruncatedNormal), 'TruncatedNormal init failed!'
assert isinstance(tensor, MetaTensor), 'TruncatedNormal init failed!'
def test_init_normal_alias():
tensor = init.initializer('normal', [5, 4], ms.float32)
assert isinstance(tensor, init.Normal), 'Normal init failed!'
assert isinstance(tensor, MetaTensor), 'Normal init failed!'
def test_init_truncatednormal_alias():
tensor = init.initializer('truncatednormal', [5, 4], ms.float32)
assert isinstance(tensor, init.TruncatedNormal), 'TruncatedNormal init failed!'
assert isinstance(tensor, MetaTensor), 'TruncatedNormal init failed!'
def test_init_abnormal():
@ -144,15 +144,7 @@ def test_init_abnormal():
def test_initializer_reinit():
weights = init.initializer("XavierUniform", shape=(10, 1, 10, 10), dtype=ms.float16)
assert weights.dtype == ms.float16
assert weights.shape == (10, 1, 10, 10)
weights = init.initializer(weights)
assert weights.dtype == ms.float16
assert weights.shape == (10, 1, 10, 10)
weights.shape = None
weights = init.initializer(weights, (10, 1))
assert weights.dtype == ms.float16
assert weights.shape == (10, 1)
assert isinstance(weights, MetaTensor), 'XavierUniform init failed!'
def test_init_xavier_uniform():

Loading…
Cancel
Save