|
|
|
@ -744,12 +744,6 @@ class DistributeTranspiler(object):
|
|
|
|
|
elif op not in lr_ops:
|
|
|
|
|
self._append_pserver_non_opt_ops(block, op)
|
|
|
|
|
|
|
|
|
|
def __op_have_grad_input__(op):
|
|
|
|
|
for varname in op.input_arg_names:
|
|
|
|
|
if varname.find("@GRAD") >= 0:
|
|
|
|
|
return varname
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def __clone_lr_op_sub_block__(op, program, lr_block):
|
|
|
|
|
if not op.has_attr('sub_block'):
|
|
|
|
|
return
|
|
|
|
@ -800,7 +794,7 @@ class DistributeTranspiler(object):
|
|
|
|
|
merged_var = None
|
|
|
|
|
for _, op in enumerate(self.optimize_ops):
|
|
|
|
|
# find the origin grad var before clipping/L2Decay,
|
|
|
|
|
# merged_var should be the input var name of L2Decaybuil
|
|
|
|
|
# merged_var should be the input var name of L2Decay
|
|
|
|
|
grad_varname_for_block = op.attr(OP_ROLE_VAR_ATTR_NAME)[1]
|
|
|
|
|
if op.attr(OP_ROLE_VAR_ATTR_NAME)[
|
|
|
|
|
0] == optimize_target_param_name:
|
|
|
|
@ -1278,9 +1272,8 @@ class DistributeTranspiler(object):
|
|
|
|
|
# create table param and grad var in pserver program
|
|
|
|
|
# create table optimize block in pserver program
|
|
|
|
|
table_opt_op = [
|
|
|
|
|
op for op in self.optimize_ops
|
|
|
|
|
if 'Param' in op.input_names and op.input("Param")[0] ==
|
|
|
|
|
self.table_name
|
|
|
|
|
op for op in self.optimize_ops if 'Param' in op.input_names and
|
|
|
|
|
op.input("Param")[0] == self.table_name
|
|
|
|
|
][0]
|
|
|
|
|
|
|
|
|
|
origin_param_var = self.origin_program.global_block().vars[
|
|
|
|
@ -1676,7 +1669,16 @@ class DistributeTranspiler(object):
|
|
|
|
|
if self.config.enable_dc_asgd:
|
|
|
|
|
new_inputs[key] = dc
|
|
|
|
|
else:
|
|
|
|
|
new_inputs[key] = merged_var
|
|
|
|
|
# Note!! This is for l2decay on sparse gradient, because it will create a new tensor for
|
|
|
|
|
# decayed gradient but not inplace modify the origin one
|
|
|
|
|
origin_grad_name = opt_op.input(key)[0]
|
|
|
|
|
if core.kNewGradSuffix(
|
|
|
|
|
) in origin_grad_name and pserver_block.has_var(
|
|
|
|
|
origin_grad_name):
|
|
|
|
|
new_grad = pserver_block.var(origin_grad_name)
|
|
|
|
|
new_inputs[key] = new_grad
|
|
|
|
|
else:
|
|
|
|
|
new_inputs[key] = merged_var
|
|
|
|
|
elif key == "Param":
|
|
|
|
|
param_block = _get_param_block(opt_op)
|
|
|
|
|
if not param_block:
|
|
|
|
|