remove name arg from gradoperation

pull/5172/head
panyifeng 5 years ago
parent b5ed54664d
commit 1a54785fe2

@ -117,7 +117,7 @@ class WithGradCell(Cell):
self.network = network self.network = network
self.loss_fn = loss_fn self.loss_fn = loss_fn
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=(sens is not None)) self.grad = C.GradOperation(get_by_list=True, sens_param=(sens is not None))
self.sens = sens self.sens = sens
if loss_fn is None: if loss_fn is None:
self.network_with_loss = network self.network_with_loss = network
@ -182,7 +182,7 @@ class TrainOneStepCell(Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -269,7 +269,7 @@ class DistributedGradReducer(Cell):
>>> self.network.add_flags(defer_inline=True) >>> self.network.add_flags(defer_inline=True)
>>> self.weights = optimizer.parameters >>> self.weights = optimizer.parameters
>>> self.optimizer = optimizer >>> self.optimizer = optimizer
>>> self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) >>> self.grad = C.GradOperation(get_by_list=True, sens_param=True)
>>> self.sens = sens >>> self.sens = sens
>>> self.reducer_flag = False >>> self.reducer_flag = False
>>> self.grad_reducer = None >>> self.grad_reducer = None

@ -210,7 +210,7 @@ class TrainOneStepWithLossScaleCell(Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
if context.get_context("device_target") == "GPU": if context.get_context("device_target") == "GPU":
self.gpu_target = True self.gpu_target = True

@ -106,12 +106,11 @@ class GradOperation(GradOperation_):
a 'ones_like(outputs)' sensitivity will be attached automatically. Default: False. a 'ones_like(outputs)' sensitivity will be attached automatically. Default: False.
""" """
def __init__(self, name, def __init__(self, get_all=False, get_by_list=False, sens_param=False):
get_all=False, get_by_list=False, sens_param=False):
self.get_all = get_all self.get_all = get_all
self.get_by_list = get_by_list self.get_by_list = get_by_list
self.sens_param = sens_param self.sens_param = sens_param
GradOperation_.__init__(self, name, get_all, get_by_list, sens_param) GradOperation_.__init__(self, 'grad', get_all, get_by_list, sens_param)
self.grad_fn = None self.grad_fn = None
self.fn = None self.fn = None
self.need_forward = False self.need_forward = False
@ -139,7 +138,7 @@ class GradOperation(GradOperation_):
fn.already_run = False fn.already_run = False
def __call__(self, fn, weights=None): def __call__(self, fn, weights=None):
grad_ = GradOperation('grad', self.get_all, self.get_by_list, self.sens_param) grad_ = GradOperation(self.get_all, self.get_by_list, self.sens_param)
if self.grad_fn is None or self.fn != fn: if self.grad_fn is None or self.fn != fn:
if context.get_context("mode") == context.GRAPH_MODE: if context.get_context("mode") == context.GRAPH_MODE:
if self.get_by_list: if self.get_by_list:

@ -216,7 +216,7 @@ class InsertGradientOf(PrimitiveWithInfer):
>>> return ret >>> return ret
>>> >>>
>>> clip = P.InsertGradientOf(clip_gradient) >>> clip = P.InsertGradientOf(clip_gradient)
>>> grad_all = C.GradOperation('get_all', get_all=True) >>> grad_all = C.GradOperation(get_all=True)
>>> def InsertGradientOfClipDemo(): >>> def InsertGradientOfClipDemo():
>>> def clip_test(x, y): >>> def clip_test(x, y):
>>> x = clip(x) >>> x = clip(x)
@ -268,7 +268,7 @@ class HookBackward(PrimitiveWithInfer):
>>> def hook_fn(grad_out): >>> def hook_fn(grad_out):
>>> print(grad_out) >>> print(grad_out)
>>> >>>
>>> grad_all = GradOperation('get_all', get_all=True) >>> grad_all = GradOperation(get_all=True)
>>> hook = P.HookBackward(hook_fn) >>> hook = P.HookBackward(hook_fn)
>>> >>>
>>> def hook_test(x, y): >>> def hook_test(x, y):

@ -163,8 +163,7 @@ class TrainOneStepCell(nn.Cell):
self.backbone = network_backbone self.backbone = network_backbone
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16)) self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
self.reduce_flag = reduce_flag self.reduce_flag = reduce_flag

@ -171,8 +171,7 @@ class TrainOneStepCell(nn.Cell):
self.backbone = network_backbone self.backbone = network_backbone
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16)) self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
self.reduce_flag = reduce_flag self.reduce_flag = reduce_flag

@ -119,7 +119,7 @@ class DistributedGradReducerThor(Cell):
>>> self.network.add_flags(defer_inline=True) >>> self.network.add_flags(defer_inline=True)
>>> self.weights = ParameterTuple(network.trainable_params()) >>> self.weights = ParameterTuple(network.trainable_params())
>>> self.optimizer = optimizer >>> self.optimizer = optimizer
>>> self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) >>> self.grad = C.GradOperation(get_by_list=True, sens_param=True)
>>> self.sens = sens >>> self.sens = sens
>>> self.reducer_flag = False >>> self.reducer_flag = False
>>> self.grad_reducer = None >>> self.grad_reducer = None

@ -383,7 +383,7 @@ class TrainingWrapper(nn.Cell):
self.network = network self.network = network
self.weights = ms.ParameterTuple(network.trainable_params()) self.weights = ms.ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -77,7 +77,7 @@ class TrainOneStepCellWithGradClip(Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -412,7 +412,7 @@ class TrainingWrapper(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -412,7 +412,7 @@ class TrainingWrapper(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -647,7 +647,7 @@ class TrainingWrapper(nn.Cell):
self.network = network self.network = network
self.weights = ms.ParameterTuple(network.trainable_params()) self.weights = ms.ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.grad_reducer = None self.grad_reducer = None

@ -141,7 +141,7 @@ class TrainOneStepCell(nn.Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
def construct(self): def construct(self):

@ -150,7 +150,7 @@ class TrainOneStepCell(nn.Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
def construct(self): def construct(self):

@ -57,8 +57,7 @@ class BertFinetuneCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()
@ -160,7 +159,7 @@ class BertSquadCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")

@ -274,7 +274,7 @@ class BertTrainOneStepCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@ -353,8 +353,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()

@ -293,7 +293,7 @@ class BertTrainOneStepCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@ -373,8 +373,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()

@ -119,7 +119,7 @@ class DistributedGradReducerThor(Cell):
>>> self.network.add_flags(defer_inline=True) >>> self.network.add_flags(defer_inline=True)
>>> self.weights = ParameterTuple(network.trainable_params()) >>> self.weights = ParameterTuple(network.trainable_params())
>>> self.optimizer = optimizer >>> self.optimizer = optimizer
>>> self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) >>> self.grad = C.GradOperation(get_by_list=True, sens_param=True)
>>> self.sens = sens >>> self.sens = sens
>>> self.reducer_flag = False >>> self.reducer_flag = False
>>> self.grad_reducer = None >>> self.grad_reducer = None

@ -239,7 +239,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, self.grad = C.GradOperation(get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.all_reduce = P.AllReduce() self.all_reduce = P.AllReduce()

@ -218,8 +218,7 @@ class BertTrainWithLossScaleCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()
@ -310,8 +309,7 @@ class BertTrainCell(nn.Cell):
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.sens = sens self.sens = sens
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@ -474,8 +472,7 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
self.network = network self.network = network
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()
@ -562,8 +559,7 @@ class BertEvaluationCell(nn.Cell):
self.weights = optimizer.parameters self.weights = optimizer.parameters
self.optimizer = optimizer self.optimizer = optimizer
self.sens = sens self.sens = sens
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")

@ -158,7 +158,7 @@ class TransformerTrainOneStepCell(nn.Cell):
self.network = network self.network = network
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens self.sens = sens
self.reducer_flag = False self.reducer_flag = False
self.parallel_mode = context.get_auto_parallel_context("parallel_mode") self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@ -244,8 +244,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self.network.add_flags(defer_inline=True) self.network.add_flags(defer_inline=True)
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer self.optimizer = optimizer
self.grad = C.GradOperation('grad', self.grad = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.reducer_flag = False self.reducer_flag = False
self.allreduce = P.AllReduce() self.allreduce = P.AllReduce()

@ -286,7 +286,7 @@ class TrainStepWrap(nn.Cell):
self.weights = ParameterTuple(network.trainable_params()) self.weights = ParameterTuple(network.trainable_params())
self.optimizer = Adam(self.weights, learning_rate=lr, eps=eps, loss_scale=loss_scale) self.optimizer = Adam(self.weights, learning_rate=lr, eps=eps, loss_scale=loss_scale)
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = loss_scale self.sens = loss_scale
def construct(self, batch_ids, batch_wts, label): def construct(self, batch_ids, batch_wts, label):

@ -337,9 +337,9 @@ class TrainStepWrap(nn.Cell):
self.optimizer_w = FTRL(learning_rate=5e-2, params=self.weights_w, self.optimizer_w = FTRL(learning_rate=5e-2, params=self.weights_w,
l1=1e-8, l2=1e-8, initial_accum=1.0, loss_scale=sens) l1=1e-8, l2=1e-8, initial_accum=1.0, loss_scale=sens)
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
self.grad_w = C.GradOperation('grad_w', get_by_list=True, self.grad_w = C.GradOperation(get_by_list=True,
sens_param=True) sens_param=True)
self.grad_d = C.GradOperation('grad_d', get_by_list=True, self.grad_d = C.GradOperation(get_by_list=True,
sens_param=True) sens_param=True)
self.sens = sens self.sens = sens
self.loss_net_w = IthOutputCell(network, output_index=0) self.loss_net_w = IthOutputCell(network, output_index=0)

@ -537,11 +537,9 @@ class TrainStepWrap(nn.Cell):
self.hyper_map = C.HyperMap() self.hyper_map = C.HyperMap()
self.grad_w = C.GradOperation('grad_w', self.grad_w = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.grad_d = C.GradOperation('grad_d', self.grad_d = C.GradOperation(get_by_list=True,
get_by_list=True,
sens_param=True) sens_param=True)
self.sens = sens self.sens = sens

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save