!9257 make parameter name optional

From: @caozhou_huawei
Reviewed-by: @kingxian,@zh_qh
Signed-off-by: @kingxian
pull/9257/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 825ba197f2

@ -30,6 +30,7 @@ __all__ = ['Parameter', 'ParameterTuple']
PARAMETER_NAME_DEFAULT = "Parameter" PARAMETER_NAME_DEFAULT = "Parameter"
PARAMETER_NAME_PREFIX_MAX_LEN = 1024 PARAMETER_NAME_PREFIX_MAX_LEN = 1024
def _is_in_parallel_mode(): def _is_in_parallel_mode():
"""Get parallel mode.""" """Get parallel mode."""
return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"] return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
@ -51,10 +52,12 @@ class Parameter(MetaTensor_):
A Parameter has to belong to a Cell. A Parameter has to belong to a Cell.
If there is an operator in the network that requires part of the inputs to be Parameter, If there is an operator in the network that requires part of the inputs to be Parameter,
then the Parameters as this part of the inputs are not allowed to be cast. then the Parameters as this part of the inputs are not allowed to be cast.
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
otherwise, the parameter name may be different than expected.
Args: Args:
default_input (Union[Tensor, MetaTensor, Number]): Parameter data, to be set initialized. default_input (Union[Tensor, MetaTensor, Number]): Parameter data, to be set initialized.
name (str): Name of the child parameter. name (str): Name of the child parameter. Default: None.
requires_grad (bool): True if the parameter requires gradient. Default: True. requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode, layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode,
broadcast and gradients communication would not be applied to parameters. Default: False. broadcast and gradients communication would not be applied to parameters. Default: False.
@ -72,7 +75,7 @@ class Parameter(MetaTensor_):
>>> def __init__(self): >>> def __init__(self):
>>> super(Net, self).__init__() >>> super(Net, self).__init__()
>>> self.matmul = P.MatMul() >>> self.matmul = P.MatMul()
>>> self.weight = Parameter(Tensor(np.ones((1,2))), name="w", requires_grad=True) >>> self.weight = Parameter(Tensor(np.ones((1,2))), requires_grad=True)
>>> >>>
>>> def construct(self, x): >>> def construct(self, x):
>>> out = self.matmul(self.weight, x) >>> out = self.matmul(self.weight, x)
@ -88,7 +91,7 @@ class Parameter(MetaTensor_):
""" """
__base_type__ = {} __base_type__ = {}
def __new__(cls, default_input, name, *args, **kwargs): def __new__(cls, default_input, *args, **kwargs):
input_class, *class_init_args = Parameter._get_parameter_new_args(default_input) input_class, *class_init_args = Parameter._get_parameter_new_args(default_input)
new_type = Parameter._get_base_class(input_class) new_type = Parameter._get_base_class(input_class)
obj = input_class.__new__(new_type) obj = input_class.__new__(new_type)
@ -112,7 +115,7 @@ class Parameter(MetaTensor_):
return ( return (
Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel)) Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel))
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False): def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False):
self._param_info = ParamInfo() self._param_info = ParamInfo()
self.name = name self.name = name
self.requires_grad = requires_grad self.requires_grad = requires_grad
@ -276,24 +279,20 @@ class Parameter(MetaTensor_):
""" """
self._is_init = is_init_ self._is_init = is_init_
def clone(self, prefix, init='same'): def clone(self, init='same'):
""" """
Clone the parameter. Clone the parameter.
Args: Args:
prefix (str): Namespace of parameter. The cloned Parameter name is
combined of prefix and current name: `f"{perfix}.{self.name}"`.
init (Union[Tensor, str, MetaTensor, numbers.Number]): Initialize the shape of the parameter. init (Union[Tensor, str, MetaTensor, numbers.Number]): Initialize the shape of the parameter.
Default: 'same'. Default: 'same'.
Returns: Returns:
Parameter, a new parameter. Parameter, a new parameter.
""" """
Validator.check_str_by_regular(prefix)
x = copy(self) x = copy(self)
# pylint: disable=protected-access # pylint: disable=protected-access
x._param_info = self._param_info.clone() x._param_info = self._param_info.clone()
x._param_info.name = prefix + '.' + self._param_info.name
x.is_init = False x.is_init = False
x.is_param_ps = self.is_param_ps x.is_param_ps = self.is_param_ps
x.init_in_server = self.init_in_server x.init_in_server = self.init_in_server
@ -464,10 +463,25 @@ class ParameterTuple(tuple):
def __new__(cls, iterable): def __new__(cls, iterable):
"""Create instance object of ParameterTuple.""" """Create instance object of ParameterTuple."""
data = tuple(iterable) data = tuple(iterable)
ids = set()
orders = {}
for x in data: for x in data:
if not isinstance(x, Parameter): if not isinstance(x, Parameter):
raise TypeError(f"ParameterTuple input should be `Parameter` collection." raise TypeError(f"ParameterTuple input should be `Parameter` collection."
f"But got a {type(iterable)}, {iterable}") f"But got a {type(iterable)}, {iterable}")
if id(x) not in ids:
ids.add(id(x))
if x.name not in orders.keys():
orders[x.name] = [0, x]
else:
if isinstance(orders[x.name], list):
name = x.name
orders[name][1].name = name + "_" + str(0)
x.name = x.name + "_" + str(1)
orders[name] = 1
else:
orders[x.name] += 1
x.name = x.name + "_" + str(orders[x.name])
return tuple.__new__(ParameterTuple, tuple(data)) return tuple.__new__(ParameterTuple, tuple(data))
def clone(self, prefix, init='same'): def clone(self, prefix, init='same'):
@ -484,7 +498,8 @@ class ParameterTuple(tuple):
Validator.check_str_by_regular(prefix) Validator.check_str_by_regular(prefix)
new = [] new = []
for x in self: for x in self:
x1 = x.clone(prefix, init) x1 = x.clone(init)
x1.name = prefix + "." + x1.name
new.append(x1) new.append(x1)
return ParameterTuple(new) return ParameterTuple(new)

@ -20,6 +20,7 @@ import os
from collections import OrderedDict from collections import OrderedDict
import numpy import numpy
from mindspore import log as logger from mindspore import log as logger
from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
from .. import context from .. import context
from ..common import dtype as mstype from ..common import dtype as mstype
from ..common.api import _executor, _pynative_exec from ..common.api import _executor, _pynative_exec
@ -619,6 +620,8 @@ class Cell(Cell_):
raise KeyError("Duplicated parameter name '{}'.".format(param_name)) raise KeyError("Duplicated parameter name '{}'.".format(param_name))
if not isinstance(param, Parameter) and param is not None: if not isinstance(param, Parameter) and param is not None:
raise TypeError("The type of parameter should be 'Parameter' if not None.") raise TypeError("The type of parameter should be 'Parameter' if not None.")
if isinstance(param, Parameter) and param.name == PARAMETER_NAME_DEFAULT:
param.name = param_name
self._params[param_name] = param self._params[param_name] = param
def cast_param(self, param): def cast_param(self, param):

@ -55,11 +55,11 @@ class DepthWiseConv(nn.Cell):
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
weight_shape = [channel_multiplier, in_planes, kernel_size, kernel_size] weight_shape = [channel_multiplier, in_planes, kernel_size, kernel_size]
self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape), name='weight') self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape))
if has_bias: if has_bias:
bias_shape = [channel_multiplier * in_planes] bias_shape = [channel_multiplier * in_planes]
self.bias = Parameter(initializer('zeros', bias_shape), name='bias') self.bias = Parameter(initializer('zeros', bias_shape))
else: else:
self.bias = None self.bias = None

@ -469,12 +469,12 @@ class DepthWiseConv(nn.Cell):
self.depthwise_conv = P.Conv2D(out_channel=in_planes * 1, kernel_size=kernel_size, self.depthwise_conv = P.Conv2D(out_channel=in_planes * 1, kernel_size=kernel_size,
stride=stride, pad_mode="same", group=in_planes) stride=stride, pad_mode="same", group=in_planes)
self.weight = Parameter(initializer( self.weight = Parameter(initializer(
weight_init, [in_planes * 1, 1, kernel_size, kernel_size]), name='depthwise_weight') weight_init, [in_planes * 1, 1, kernel_size, kernel_size]))
else: else:
self.depthwise_conv = P.DepthwiseConv2dNative( self.depthwise_conv = P.DepthwiseConv2dNative(
channel_multiplier=1, kernel_size=kernel_size, stride=stride, pad_mode='same',) channel_multiplier=1, kernel_size=kernel_size, stride=stride, pad_mode='same',)
self.weight = Parameter(initializer( self.weight = Parameter(initializer(
weight_init, [1, in_planes, kernel_size, kernel_size]), name='depthwise_weight') weight_init, [1, in_planes, kernel_size, kernel_size]))
def construct(self, x): def construct(self, x):
x = self.depthwise_conv(x, self.weight) x = self.depthwise_conv(x, self.weight)

@ -28,9 +28,8 @@ class DenseNoTranpose(nn.Cell):
def __init__(self, input_channels, output_channels, weight_init): def __init__(self, input_channels, output_channels, weight_init):
super(DenseNoTranpose, self).__init__() super(DenseNoTranpose, self).__init__()
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16), self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
name="weight") self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
self.matmul = P.MatMul(transpose_b=False) self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()

@ -26,9 +26,8 @@ class DenseNoTranpose(nn.Cell):
"""Dense method""" """Dense method"""
def __init__(self, input_channels, output_channels, weight_init): def __init__(self, input_channels, output_channels, weight_init):
super(DenseNoTranpose, self).__init__() super(DenseNoTranpose, self).__init__()
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16), self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
name="weight") self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
self.matmul = P.MatMul(transpose_b=False) self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()

@ -55,7 +55,7 @@ class THOR_GPU(Optimizer):
Validator.check_value_type("momentum", momentum, [float], self.cls_name) Validator.check_value_type("momentum", momentum, [float], self.cls_name)
if isinstance(momentum, float) and momentum < 0.0: if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters self.params = self.parameters
self.use_nesterov = Validator.check_bool(use_nesterov) self.use_nesterov = Validator.check_bool(use_nesterov)
self.moments = self.params.clone(prefix="moments", init='zeros') self.moments = self.params.clone(prefix="moments", init='zeros')
@ -160,7 +160,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0: if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros') self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()

@ -109,11 +109,10 @@ class _Conv(Cell):
'attr \'group\' of \'Conv2D\' Op.') 'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer( self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') weight_init, [out_channels, in_channels // group, *kernel_size]))
if Validator.check_bool(has_bias): if Validator.check_bool(has_bias):
self.bias = Parameter(_initializer( self.bias = Parameter(initializer(bias_init, [out_channels]))
bias_init, [out_channels]), name='bias')
else: else:
if bias_init != 'zeros': if bias_init != 'zeros':
logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
@ -174,12 +173,10 @@ class Conv2d_Thor_GPU(_Conv):
split_dim = 128 split_dim = 128
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim) matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim)
self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32), self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32), requires_grad=False)
name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32), requires_grad=False)
self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32),
name='matrix_A_inv', requires_grad=False)
self.broadcast_to = P.BroadcastTo(matrix_A_shape) self.broadcast_to = P.BroadcastTo(matrix_A_shape)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same") self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same")
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.shape = P.Shape() self.shape = P.Shape()
@ -195,7 +192,7 @@ class Conv2d_Thor_GPU(_Conv):
self.axis = 0 self.axis = 0
self.sqrt = P.Sqrt() self.sqrt = P.Sqrt()
self.reduce_mean = P.ReduceMean(keep_dims=False) self.reduce_mean = P.ReduceMean(keep_dims=False)
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False) self.damping = Parameter(Tensor(damping), requires_grad=False)
self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32) self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32) self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
@ -301,14 +298,14 @@ class Dense_Thor_GPU(Cell):
weight_init.shape[1] != in_channels: weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error") raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias: if self.has_bias:
if isinstance(bias_init, Tensor): if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error") raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
@ -317,12 +314,10 @@ class Dense_Thor_GPU(Cell):
self.activation_flag = self.activation is not None self.activation_flag = self.activation is not None
split_dim = 128 split_dim = 128
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim) matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)),
name="matrix_G_inv", requires_grad=False)
self.broadcast_to = P.BroadcastTo(matrix_A_shape) self.broadcast_to = P.BroadcastTo(matrix_A_shape)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.transpose = P.Transpose() self.transpose = P.Transpose()
@ -331,7 +326,7 @@ class Dense_Thor_GPU(Cell):
self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.batch_size = Tensor(batch_size, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16)
self.getG = P.InsertGradientOf(self.save_gradient) self.getG = P.InsertGradientOf(self.save_gradient)
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False) self.damping = Parameter(Tensor(damping), requires_grad=False)
self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
self.cast = P.Cast() self.cast = P.Cast()
@ -467,20 +462,20 @@ class Conv2d_Thor(_Conv):
self.matrix_G_device_shape[3]) self.matrix_G_device_shape[3])
self.matrix_A_inv = Parameter( self.matrix_A_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)), Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
name='matrix_A_inv', requires_grad=False) requires_grad=False)
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.matrix_G_inv = Parameter( self.matrix_G_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)), Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
name="matrix_G_inv", requires_grad=False) requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.fake_G = Tensor( self.fake_G = Tensor(
np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul() self.mul = P.Mul()
self.cast = P.Cast() self.cast = P.Cast()
self.damping = Tensor(damping) self.damping = Tensor(damping)
@ -648,14 +643,14 @@ class Dense_Thor(Cell):
weight_init.shape[1] != in_channels: weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error") raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias: if self.has_bias:
if isinstance(bias_init, Tensor): if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error") raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
@ -663,10 +658,8 @@ class Dense_Thor(Cell):
self.activation = get_activation(activation) self.activation = get_activation(activation)
self.activation_flag = self.activation is not None self.activation_flag = self.activation is not None
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv', self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), requires_grad=False)
requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
requires_grad=False)
self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)) self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
@ -676,7 +669,7 @@ class Dense_Thor(Cell):
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul() self.mul = P.Mul()
self.cast = P.Cast() self.cast = P.Cast()
self.damping = Tensor(damping) self.damping = Tensor(damping)
@ -689,8 +682,8 @@ class Dense_Thor(Cell):
self.assignadd = P.AssignAdd() self.assignadd = P.AssignAdd()
self.freq = Tensor(frequency, mstype.int32) self.freq = Tensor(frequency, mstype.int32)
self.axis = 0 self.axis = 0
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001]) self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001])
self.fused_abs_max2 = P.CusFusedAbsMax1() self.fused_abs_max2 = P.CusFusedAbsMax1()
self.log = P.Log() self.log = P.Log()

@ -33,13 +33,12 @@ class CTCLoss(_Loss):
def __init__(self, max_sequence_length, max_label_length, batch_size): def __init__(self, max_sequence_length, max_label_length, batch_size):
super(CTCLoss, self).__init__() super(CTCLoss, self).__init__()
self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32), self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32))
name="sequence_length")
labels_indices = [] labels_indices = []
for i in range(batch_size): for i in range(batch_size):
for j in range(max_label_length): for j in range(max_label_length):
labels_indices.append([i, j]) labels_indices.append([i, j])
self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64), name="labels_indices") self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64))
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.ctc_loss = P.CTCLoss(ctc_merge_repeated=True) self.ctc_loss = P.CTCLoss(ctc_merge_repeated=True)

@ -45,12 +45,10 @@ class StackedRNN(nn.Cell):
self.rnn1 = P.DynamicRNN(forget_bias=0.0) self.rnn1 = P.DynamicRNN(forget_bias=0.0)
self.rnn2 = P.DynamicRNN(forget_bias=0.0) self.rnn2 = P.DynamicRNN(forget_bias=0.0)
self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16), self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16))
name="w1") self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16))
self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16), self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
name="w2") self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b1")
self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b2")
self.h1 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16)) self.h1 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
self.h2 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16)) self.h2 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
@ -98,7 +96,7 @@ class StackedRNNForGPU(nn.Cell):
self.cast = P.Cast() self.cast = P.Cast()
k = (1 / hidden_size) ** 0.5 k = (1 / hidden_size) ** 0.5
weight_shape = 4 * hidden_size * (input_size + 3 * hidden_size + 4) weight_shape = 4 * hidden_size * (input_size + 3 * hidden_size + 4)
self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32), name='weight') self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32))
self.h = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32)) self.h = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))
self.c = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32)) self.c = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))

@ -39,7 +39,6 @@ class MeanConv(nn.Cell):
""" """
def __init__(self, def __init__(self,
name,
feature_in_dim, feature_in_dim,
feature_out_dim, feature_out_dim,
activation, activation,
@ -47,8 +46,7 @@ class MeanConv(nn.Cell):
super(MeanConv, self).__init__() super(MeanConv, self).__init__()
self.out_weight = Parameter( self.out_weight = Parameter(
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32), initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
name=name + 'out_weight')
if activation == "tanh": if activation == "tanh":
self.act = P.Tanh() self.act = P.Tanh()
@ -90,15 +88,13 @@ class AttenConv(nn.Cell):
""" """
def __init__(self, def __init__(self,
name,
feature_in_dim, feature_in_dim,
feature_out_dim, feature_out_dim,
dropout=0.2): dropout=0.2):
super(AttenConv, self).__init__() super(AttenConv, self).__init__()
self.out_weight = Parameter( self.out_weight = Parameter(
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32), initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
name=name + 'out_weight')
self.cast = P.Cast() self.cast = P.Cast()
self.squeeze = P.Squeeze(1) self.squeeze = P.Squeeze(1)
self.concat = P.Concat(axis=1) self.concat = P.Concat(axis=1)
@ -147,10 +143,8 @@ class BGCF(nn.Cell):
input_dim): input_dim):
super(BGCF, self).__init__() super(BGCF, self).__init__()
self.user_embeddings = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32), self.user_embed = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32))
name='user_embed') self.item_embed = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32))
self.item_embeddings = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32),
name='item_embed')
self.cast = P.Cast() self.cast = P.Cast()
self.tanh = P.Tanh() self.tanh = P.Tanh()
self.shape = P.Shape() self.shape = P.Shape()
@ -163,30 +157,27 @@ class BGCF(nn.Cell):
(self.input_dim, self.num_user, self.num_item) = dataset_argv (self.input_dim, self.num_user, self.num_item) = dataset_argv
self.layer_dim = architect_argv self.layer_dim = architect_argv
self.gnew_agg_mean = MeanConv('gnew_agg_mean', self.input_dim, self.layer_dim, self.gnew_agg_mean = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[1]) activation=activation, dropout=neigh_drop_rate[1])
self.gnew_agg_mean.to_float(mstype.float16) self.gnew_agg_mean.to_float(mstype.float16)
self.gnew_agg_user = AttenConv('gnew_agg_att_user', self.input_dim, self.gnew_agg_user = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_user.to_float(mstype.float16) self.gnew_agg_user.to_float(mstype.float16)
self.gnew_agg_item = AttenConv('gnew_agg_att_item', self.input_dim, self.gnew_agg_item = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_item.to_float(mstype.float16) self.gnew_agg_item.to_float(mstype.float16)
self.user_feature_dim = self.input_dim self.user_feature_dim = self.input_dim
self.item_feature_dim = self.input_dim self.item_feature_dim = self.input_dim
self.final_weight = Parameter( self.final_weight = Parameter(
initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32), initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32))
name='final_weight')
self.raw_agg_funcs_user = MeanConv('raw_agg_user', self.input_dim, self.layer_dim, self.raw_agg_funcs_user = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[0]) activation=activation, dropout=neigh_drop_rate[0])
self.raw_agg_funcs_user.to_float(mstype.float16) self.raw_agg_funcs_user.to_float(mstype.float16)
self.raw_agg_funcs_item = MeanConv('raw_agg_item', self.input_dim, self.layer_dim, self.raw_agg_funcs_item = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[0]) activation=activation, dropout=neigh_drop_rate[0])
self.raw_agg_funcs_item.to_float(mstype.float16) self.raw_agg_funcs_item.to_float(mstype.float16)
@ -207,14 +198,14 @@ class BGCF(nn.Cell):
neg_gnew_neighs, neg_gnew_neighs,
neg_item_num): neg_item_num):
"""Aggregate user and item embeddings""" """Aggregate user and item embeddings"""
all_user_embed = self.gather(self.user_embeddings, self.concat_0((u_id, pos_users)), 0) all_user_embed = self.gather(self.user_embed, self.concat_0((u_id, pos_users)), 0)
u_self_matrix_at_layers = self.gather(self.user_embeddings, u_group_nodes, 0) u_self_matrix_at_layers = self.gather(self.user_embed, u_group_nodes, 0)
u_neigh_matrix_at_layers = self.gather(self.item_embeddings, u_neighs, 0) u_neigh_matrix_at_layers = self.gather(self.item_embed, u_neighs, 0)
u_output_mean = self.raw_agg_funcs_user(u_self_matrix_at_layers, u_neigh_matrix_at_layers) u_output_mean = self.raw_agg_funcs_user(u_self_matrix_at_layers, u_neigh_matrix_at_layers)
u_gnew_neighs_matrix = self.gather(self.item_embeddings, u_gnew_neighs, 0) u_gnew_neighs_matrix = self.gather(self.item_embed, u_gnew_neighs, 0)
u_output_from_gnew_mean = self.gnew_agg_mean(u_self_matrix_at_layers, u_gnew_neighs_matrix) u_output_from_gnew_mean = self.gnew_agg_mean(u_self_matrix_at_layers, u_gnew_neighs_matrix)
u_output_from_gnew_att = self.gnew_agg_user(u_self_matrix_at_layers, u_output_from_gnew_att = self.gnew_agg_user(u_self_matrix_at_layers,
@ -223,14 +214,14 @@ class BGCF(nn.Cell):
u_output = self.concat_1((u_output_mean, u_output_from_gnew_mean, u_output_from_gnew_att)) u_output = self.concat_1((u_output_mean, u_output_from_gnew_mean, u_output_from_gnew_att))
all_user_rep = self.tanh(u_output) all_user_rep = self.tanh(u_output)
all_pos_item_embed = self.gather(self.item_embeddings, self.concat_0((pos_item_id, pos_items)), 0) all_pos_item_embed = self.gather(self.item_embed, self.concat_0((pos_item_id, pos_items)), 0)
i_self_matrix_at_layers = self.gather(self.item_embeddings, i_group_nodes, 0) i_self_matrix_at_layers = self.gather(self.item_embed, i_group_nodes, 0)
i_neigh_matrix_at_layers = self.gather(self.user_embeddings, i_neighs, 0) i_neigh_matrix_at_layers = self.gather(self.user_embed, i_neighs, 0)
i_output_mean = self.raw_agg_funcs_item(i_self_matrix_at_layers, i_neigh_matrix_at_layers) i_output_mean = self.raw_agg_funcs_item(i_self_matrix_at_layers, i_neigh_matrix_at_layers)
i_gnew_neighs_matrix = self.gather(self.user_embeddings, i_gnew_neighs, 0) i_gnew_neighs_matrix = self.gather(self.user_embed, i_gnew_neighs, 0)
i_output_from_gnew_mean = self.gnew_agg_mean(i_self_matrix_at_layers, i_gnew_neighs_matrix) i_output_from_gnew_mean = self.gnew_agg_mean(i_self_matrix_at_layers, i_gnew_neighs_matrix)
i_output_from_gnew_att = self.gnew_agg_item(i_self_matrix_at_layers, i_output_from_gnew_att = self.gnew_agg_item(i_self_matrix_at_layers,
@ -239,14 +230,14 @@ class BGCF(nn.Cell):
i_output = self.concat_1((i_output_mean, i_output_from_gnew_mean, i_output_from_gnew_att)) i_output = self.concat_1((i_output_mean, i_output_from_gnew_mean, i_output_from_gnew_att))
all_pos_item_rep = self.tanh(i_output) all_pos_item_rep = self.tanh(i_output)
neg_item_embed = self.gather(self.item_embeddings, neg_item_id, 0) neg_item_embed = self.gather(self.item_embed, neg_item_id, 0)
neg_self_matrix_at_layers = self.gather(self.item_embeddings, neg_group_nodes, 0) neg_self_matrix_at_layers = self.gather(self.item_embed, neg_group_nodes, 0)
neg_neigh_matrix_at_layers = self.gather(self.user_embeddings, neg_neighs, 0) neg_neigh_matrix_at_layers = self.gather(self.user_embed, neg_neighs, 0)
neg_output_mean = self.raw_agg_funcs_item(neg_self_matrix_at_layers, neg_neigh_matrix_at_layers) neg_output_mean = self.raw_agg_funcs_item(neg_self_matrix_at_layers, neg_neigh_matrix_at_layers)
neg_gnew_neighs_matrix = self.gather(self.user_embeddings, neg_gnew_neighs, 0) neg_gnew_neighs_matrix = self.gather(self.user_embed, neg_gnew_neighs, 0)
neg_output_from_gnew_mean = self.gnew_agg_mean(neg_self_matrix_at_layers, neg_gnew_neighs_matrix) neg_output_from_gnew_mean = self.gnew_agg_mean(neg_self_matrix_at_layers, neg_gnew_neighs_matrix)
neg_output_from_gnew_att = self.gnew_agg_item(neg_self_matrix_at_layers, neg_output_from_gnew_att = self.gnew_agg_item(neg_self_matrix_at_layers,

@ -80,14 +80,14 @@ class GNNFeatureTransform(nn.Cell):
weight_init.shape[1] != in_channels: weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error") raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias: if self.has_bias:
if isinstance(bias_init, Tensor): if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error") raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
@ -280,7 +280,7 @@ class AttentionHead(nn.Cell):
self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio) self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio)
self.matmul = P.MatMul() self.matmul = P.MatMul()
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
self.bias = Parameter(initializer('zeros', self.out_channel), name='bias') self.bias = Parameter(initializer('zeros', self.out_channel))
self.residual = residual self.residual = residual
if self.residual: if self.residual:
if in_channel != out_channel: if in_channel != out_channel:

@ -80,8 +80,8 @@ class BertPretrainEva(nn.Cell):
self.equal = P.Equal() self.equal = P.Equal()
self.mean = P.ReduceMean() self.mean = P.ReduceMean()
self.sum = P.ReduceSum() self.sum = P.ReduceSum()
self.total = Parameter(Tensor([0], mstype.float32), name='total') self.total = Parameter(Tensor([0], mstype.float32))
self.acc = Parameter(Tensor([0], mstype.float32), name='acc') self.acc = Parameter(Tensor([0], mstype.float32))
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.shape = P.Shape() self.shape = P.Shape()
self.cast = P.Cast() self.cast = P.Cast()

@ -52,7 +52,7 @@ class CRF(nn.Cell):
transitions = np.random.normal(size=(self.target_size, self.target_size)).astype(np.float32) transitions = np.random.normal(size=(self.target_size, self.target_size)).astype(np.float32)
transitions[tag_to_index[self.START_TAG], :] = -10000 transitions[tag_to_index[self.START_TAG], :] = -10000
transitions[:, tag_to_index[self.STOP_TAG]] = -10000 transitions[:, tag_to_index[self.STOP_TAG]] = -10000
self.transitions = Parameter(Tensor(transitions), name="transition_matrix") self.transitions = Parameter(Tensor(transitions))
self.cat = P.Concat(axis=-1) self.cat = P.Concat(axis=-1)
self.argmax = P.ArgMaxWithValue(axis=-1) self.argmax = P.ArgMaxWithValue(axis=-1)
self.log = P.Log() self.log = P.Log()

@ -90,8 +90,7 @@ class BertFinetuneCell(nn.Cell):
self.loss_scale = None self.loss_scale = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = scale_update_cell
if scale_update_cell: if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
name="loss_scale")
def construct(self, def construct(self,
input_ids, input_ids,
@ -185,8 +184,8 @@ class BertSquadCell(nn.Cell):
self.loss_scale = None self.loss_scale = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = scale_update_cell
if scale_update_cell: if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
name="loss_scale")
def construct(self, def construct(self,
input_ids, input_ids,
input_mask, input_mask,
@ -306,9 +305,9 @@ class BertSquad(nn.Cell):
self.num_labels = num_labels self.num_labels = num_labels
self.seq_length = config.seq_length self.seq_length = config.seq_length
self.is_training = is_training self.is_training = is_training
self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num') self.total_num = Parameter(Tensor([0], mstype.float32))
self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num') self.start_num = Parameter(Tensor([0], mstype.float32))
self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num') self.end_num = Parameter(Tensor([0], mstype.float32))
self.sum = P.ReduceSum() self.sum = P.ReduceSum()
self.equal = P.Equal() self.equal = P.Equal()
self.argmax = P.ArgMaxWithValue(axis=1) self.argmax = P.ArgMaxWithValue(axis=1)

@ -84,8 +84,7 @@ class GetMaskedLMOutput(nn.Cell):
self.output_bias = Parameter( self.output_bias = Parameter(
initializer( initializer(
'zero', 'zero',
config.vocab_size), config.vocab_size))
name='output_bias')
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.log_softmax = nn.LogSoftmax(axis=-1) self.log_softmax = nn.LogSoftmax(axis=-1)
self.shape_flat_offsets = (-1, 1) self.shape_flat_offsets = (-1, 1)
@ -359,8 +358,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None self.loss_scale = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = scale_update_cell
if scale_update_cell: if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
name="loss_scale")
@C.add_flags(has_effect=True) @C.add_flags(has_effect=True)
def construct(self, def construct(self,
@ -465,10 +463,10 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
self.enable_global_norm = enable_global_norm self.enable_global_norm = enable_global_norm
self.one = Tensor(np.array([1]).astype(np.int32)) self.one = Tensor(np.array([1]).astype(np.int32))
self.zero = Tensor(np.array([0]).astype(np.int32)) self.zero = Tensor(np.array([0]).astype(np.int32))
self.local_step = Parameter(initializer(0, [1], mstype.int32), name="local_step") self.local_step = Parameter(initializer(0, [1], mstype.int32))
self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros') self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros')
self.accu_overflow = Parameter(initializer(0, [1], mstype.int32), name="accu_overflow") self.accu_overflow = Parameter(initializer(0, [1], mstype.int32))
self.loss = Parameter(initializer(0, [1], mstype.float32), name="accu_loss") self.accu_loss = Parameter(initializer(0, [1], mstype.float32))
self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.reducer_flag = False self.reducer_flag = False
@ -499,8 +497,7 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
self.loss_scale = None self.loss_scale = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = scale_update_cell
if scale_update_cell: if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
name="loss_scale")
@C.add_flags(has_effect=True) @C.add_flags(has_effect=True)
def construct(self, def construct(self,
@ -529,8 +526,8 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
# update accumulation parameters # update accumulation parameters
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps) is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
self.local_step = self.select(is_accu_step, self.local_step + self.one, self.one) self.local_step = self.select(is_accu_step, self.local_step + self.one, self.one)
self.loss = self.select(is_accu_step, self.loss + loss, loss) self.accu_loss = self.select(is_accu_step, self.accu_loss + loss, loss)
mean_loss = self.loss / self.local_step mean_loss = self.accu_loss / self.local_step
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps) is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
# alloc status and clear should be right before gradoperation # alloc status and clear should be right before gradoperation

@ -110,8 +110,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range), (TruncatedNormal(initializer_range),
[vocab_size, embedding_size]), [vocab_size, embedding_size]))
name='embedding_table')
self.expand = P.ExpandDims() self.expand = P.ExpandDims()
self.shape_flat = (-1,) self.shape_flat = (-1,)
self.gather = P.GatherV2() self.gather = P.GatherV2()
@ -170,8 +169,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.embedding_table = Parameter(initializer self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range), (TruncatedNormal(initializer_range),
[token_type_vocab_size, [token_type_vocab_size,
embedding_size]), embedding_size]))
name='embedding_table')
self.shape_flat = (-1,) self.shape_flat = (-1,)
self.one_hot = P.OneHot() self.one_hot = P.OneHot()
@ -188,8 +186,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.full_position_embeddings = Parameter(initializer self.full_position_embeddings = Parameter(initializer
(TruncatedNormal(initializer_range), (TruncatedNormal(initializer_range),
[max_position_embeddings, [max_position_embeddings,
embedding_size]), embedding_size]))
name='full_position_embeddings')
def construct(self, token_type_ids, word_embeddings): def construct(self, token_type_ids, word_embeddings):
"""Postprocessors apply positional and token type embeddings to word embeddings.""" """Postprocessors apply positional and token type embeddings to word embeddings."""
@ -314,8 +311,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
self.embeddings_table = Parameter( self.embeddings_table = Parameter(
initializer(TruncatedNormal(initializer_range), initializer(TruncatedNormal(initializer_range),
[self.vocab_size, self.depth]), [self.vocab_size, self.depth]))
name='embeddings_for_position')
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length, self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
max_relative_position=max_relative_position) max_relative_position=max_relative_position)

@ -86,8 +86,8 @@ class BertPretrainEva(nn.Cell):
self.equal = P.Equal() self.equal = P.Equal()
self.mean = P.ReduceMean() self.mean = P.ReduceMean()
self.sum = P.ReduceSum() self.sum = P.ReduceSum()
self.total = Parameter(Tensor([0], mstype.float32), name='total') self.total = Parameter(Tensor([0], mstype.float32))
self.acc = Parameter(Tensor([0], mstype.float32), name='acc') self.acc = Parameter(Tensor([0], mstype.float32))
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.shape = P.Shape() self.shape = P.Shape()
self.cast = P.Cast() self.cast = P.Cast()

@ -98,8 +98,7 @@ class GetMaskedLMOutput(nn.Cell):
self.output_bias = Parameter( self.output_bias = Parameter(
initializer( initializer(
'zero', 'zero',
config.vocab_size), config.vocab_size))
name='output_bias')
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.log_softmax = nn.LogSoftmax(axis=-1) self.log_softmax = nn.LogSoftmax(axis=-1)
self.shape_flat_offsets = (-1, 1) self.shape_flat_offsets = (-1, 1)
@ -379,8 +378,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None self.loss_scale = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = scale_update_cell
if scale_update_cell: if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
name="loss_scale")
@C.add_flags(has_effect=True) @C.add_flags(has_effect=True)
def construct(self, def construct(self,

@ -136,8 +136,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range), (TruncatedNormal(initializer_range),
[vocab_size, embedding_size]), [vocab_size, embedding_size]))
name='embedding_table')
self.expand = P.ExpandDims() self.expand = P.ExpandDims()
self.shape_flat = (-1,) self.shape_flat = (-1,)
self.gather = P.GatherV2() self.gather = P.GatherV2()
@ -200,7 +199,6 @@ class EmbeddingPostprocessor(nn.Cell):
embedding_shape=embedding_shape, embedding_shape=embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings, use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=initializer_range, initializer_range=initializer_range,
name='embedding_table',
batch_size=batch_size, batch_size=batch_size,
damping=damping, damping=damping,
loss_scale=loss_scale, loss_scale=loss_scale,
@ -224,7 +222,6 @@ class EmbeddingPostprocessor(nn.Cell):
embedding_shape=position_embedding_shape, embedding_shape=position_embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings, use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=initializer_range, initializer_range=initializer_range,
name='full_position_embeddings',
batch_size=batch_size, batch_size=batch_size,
damping=damping, damping=damping,
loss_scale=loss_scale, loss_scale=loss_scale,
@ -363,8 +360,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
self.embeddings_table = Parameter( self.embeddings_table = Parameter(
initializer(TruncatedNormal(initializer_range), initializer(TruncatedNormal(initializer_range),
[self.vocab_size, self.depth]), [self.vocab_size, self.depth]))
name='embeddings_for_position')
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length, self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
max_relative_position=max_relative_position) max_relative_position=max_relative_position)
@ -944,7 +940,6 @@ class BertModel(nn.Cell):
embedding_shape=output_embedding_shape, embedding_shape=output_embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings, use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=config.initializer_range, initializer_range=config.initializer_range,
name='embedding_table',
batch_size=batch_size, batch_size=batch_size,
damping=damping, damping=damping,
loss_scale=loss_scale, loss_scale=loss_scale,

@ -94,9 +94,9 @@ class FusedLayerNorm(Cell):
self.begin_norm_axis = begin_norm_axis self.begin_norm_axis = begin_norm_axis
self.begin_params_axis = begin_params_axis self.begin_params_axis = begin_params_axis
self.gamma = Parameter(initializer( self.gamma = Parameter(initializer(
gamma_init, normalized_shape), name="gamma") gamma_init, normalized_shape))
self.beta = Parameter(initializer( self.beta = Parameter(initializer(
beta_init, normalized_shape), name="beta") beta_init, normalized_shape))
self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis) self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis)
self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5) self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5)

@ -52,7 +52,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0: if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros') self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
@ -80,7 +80,7 @@ class THOR(Optimizer):
self.batch_size = batch_size self.batch_size = batch_size
self.damping = damping self.damping = damping
self.one = Tensor(1, mstype.int32) self.one = Tensor(1, mstype.int32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
def construct(self, gradients): def construct(self, gradients):
"""construct of THOR""" """construct of THOR"""

@ -54,7 +54,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0: if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros') self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
@ -82,7 +82,7 @@ class THOR(Optimizer):
self.batch_size = batch_size self.batch_size = batch_size
self.damping = damping self.damping = damping
self.one = Tensor(1, mstype.int32) self.one = Tensor(1, mstype.int32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
mean = _get_gradients_mean() mean = _get_gradients_mean()
degree = _get_device_num() degree = _get_device_num()
self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree) self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree)

@ -41,7 +41,6 @@ class Embedding_Thor(Cell):
embedding_shape, embedding_shape,
use_one_hot_embeddings=False, use_one_hot_embeddings=False,
initializer_range=0.02, initializer_range=0.02,
name='embedding_table',
batch_size=12, batch_size=12,
damping=0.03, damping=0.03,
loss_scale=1, loss_scale=1,
@ -52,8 +51,7 @@ class Embedding_Thor(Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range), (TruncatedNormal(initializer_range),
[vocab_size, embedding_size]), [vocab_size, embedding_size]))
name=name)
self.thor = True self.thor = True
self.expand = P.ExpandDims() self.expand = P.ExpandDims()
self.shape_flat = (-1,) self.shape_flat = (-1,)
@ -67,14 +65,13 @@ class Embedding_Thor(Cell):
self.shape = P.Shape() self.shape = P.Shape()
self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), requires_grad=False)
name='matrix_A_inv', requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)), self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)),
name="matrix_G_inv", requires_grad=False) requires_grad=False)
self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)) self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16))
self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32)) self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32))
self.dampingG = Tensor(np.identity(embedding_size), mstype.float32) self.dampingG = Tensor(np.identity(embedding_size), mstype.float32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.freq = Tensor(frequency, mstype.int32) self.freq = Tensor(frequency, mstype.int32)
self.axis = 0 self.axis = 0
self.damping = damping self.damping = damping
@ -169,14 +166,14 @@ class Dense_Thor(Cell):
weight_init.shape()[1] != in_channels: weight_init.shape()[1] != in_channels:
raise ValueError("weight_init shape error") raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias: if self.has_bias:
if isinstance(bias_init, Tensor): if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels: if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
raise ValueError("bias_init shape error") raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd() self.bias_add = P.BiasAdd()
@ -184,9 +181,9 @@ class Dense_Thor(Cell):
self.activation = get_activation(activation) self.activation = get_activation(activation)
self.activation_flag = self.activation is not None self.activation_flag = self.activation is not None
self.matrix_A_inv = Parameter(Tensor(np.zeros([in_channels, in_channels]).astype(np.float16)), self.matrix_A_inv = Parameter(Tensor(np.zeros([in_channels, in_channels]).astype(np.float16)),
name='matrix_A_inv', requires_grad=False) requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([out_channels, out_channels]).astype(np.float16)), self.matrix_G_inv = Parameter(Tensor(np.zeros([out_channels, out_channels]).astype(np.float16)),
name="matrix_G_inv", requires_grad=False) requires_grad=False)
self.fake_G = Tensor(np.zeros([out_channels, out_channels]).astype(np.float16)) self.fake_G = Tensor(np.zeros([out_channels, out_channels]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True) self.matmul = P.MatMul(transpose_b=True)
@ -196,7 +193,7 @@ class Dense_Thor(Cell):
self.shape = P.Shape() self.shape = P.Shape()
self.reshape = P.Reshape() self.reshape = P.Reshape()
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul() self.mul = P.Mul()
self.cast = P.Cast() self.cast = P.Cast()
self.damping = damping self.damping = damping

@ -57,11 +57,10 @@ class BahdanauAttention(nn.Cell):
self.normalize = normalize self.normalize = normalize
self.num_units = num_units self.num_units = num_units
self.linear_att = Parameter(Tensor(np.random.uniform(-initializer_range, initializer_range, size=[num_units]), self.linear_att = Parameter(Tensor(np.random.uniform(-initializer_range, initializer_range, size=[num_units]),
dtype=mstype.float32), name='linear_att') dtype=mstype.float32))
if self.normalize: if self.normalize:
self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32), self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32))
name='normalize_scalar') self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32))
self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32), name='normalize_bias')
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.transpose_orders = (1, 0, 2) self.transpose_orders = (1, 0, 2)
self.shape_op = P.Shape() self.shape_op = P.Shape()

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save