|
|
|
@ -12,17 +12,19 @@ def grad_var_name(var_name):
|
|
|
|
|
def create_op(scope, op_type, inputs, outputs, attrs):
|
|
|
|
|
kwargs = dict()
|
|
|
|
|
|
|
|
|
|
def __create_var__(name, var_name):
|
|
|
|
|
scope.new_var(var_name)
|
|
|
|
|
kwargs[name].append(var_name)
|
|
|
|
|
|
|
|
|
|
for in_name, in_dup in Operator.get_op_inputs(op_type):
|
|
|
|
|
if in_name in inputs:
|
|
|
|
|
kwargs[in_name] = []
|
|
|
|
|
if in_dup:
|
|
|
|
|
sub_in = inputs[in_name]
|
|
|
|
|
for sub_in_name, _ in sub_in:
|
|
|
|
|
var = scope.new_var(sub_in_name)
|
|
|
|
|
kwargs[in_name].append(sub_in_name)
|
|
|
|
|
__create_var__(in_name, sub_in_name)
|
|
|
|
|
else:
|
|
|
|
|
var = scope.new_var(in_name)
|
|
|
|
|
kwargs[in_name].append(in_name)
|
|
|
|
|
__create_var__(in_name, in_name)
|
|
|
|
|
|
|
|
|
|
for out_name, out_dup in Operator.get_op_outputs(op_type):
|
|
|
|
|
if out_name in outputs:
|
|
|
|
@ -30,11 +32,9 @@ def create_op(scope, op_type, inputs, outputs, attrs):
|
|
|
|
|
if out_dup:
|
|
|
|
|
sub_out = outputs[out_name]
|
|
|
|
|
for sub_out_name, _ in sub_out:
|
|
|
|
|
var = scope.new_var(sub_out_name)
|
|
|
|
|
kwargs[out_name].append(sub_out_name)
|
|
|
|
|
__create_var__(out_name, sub_out_name)
|
|
|
|
|
else:
|
|
|
|
|
var = scope.new_var(out_name)
|
|
|
|
|
kwargs[out_name].append(out_name)
|
|
|
|
|
__create_var__(out_name, out_name)
|
|
|
|
|
|
|
|
|
|
for attr_name in Operator.get_op_attr_names(op_type):
|
|
|
|
|
if attr_name in attrs:
|
|
|
|
@ -44,49 +44,46 @@ def create_op(scope, op_type, inputs, outputs, attrs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_input(scope, op, inputs, place):
|
|
|
|
|
def __set_input__(var_name, var):
|
|
|
|
|
tensor = scope.find_var(var_name).get_tensor()
|
|
|
|
|
if isinstance(var, tuple):
|
|
|
|
|
tensor.set_lod(var[1])
|
|
|
|
|
var = var[0]
|
|
|
|
|
tensor.set_dims(var.shape)
|
|
|
|
|
tensor.set(var, place)
|
|
|
|
|
|
|
|
|
|
for in_name, in_dup in Operator.get_op_inputs(op.type()):
|
|
|
|
|
if in_name in inputs:
|
|
|
|
|
if in_dup:
|
|
|
|
|
sub_in = inputs[in_name]
|
|
|
|
|
for sub_in_name, sub_in_val in sub_in:
|
|
|
|
|
var = scope.find_var(sub_in_name)
|
|
|
|
|
tensor = var.get_tensor()
|
|
|
|
|
sub_in_array = sub_in_val[0] \
|
|
|
|
|
if isinstance(sub_in_val, tuple) else sub_in_val
|
|
|
|
|
tensor.set_dims(sub_in_array.shape)
|
|
|
|
|
tensor.set(sub_in_array, place)
|
|
|
|
|
if isinstance(sub_in_val, tuple):
|
|
|
|
|
tensor.set_lod(sub_in_val[1])
|
|
|
|
|
__set_input__(sub_in_name, sub_in_val)
|
|
|
|
|
else:
|
|
|
|
|
var = scope.find_var(in_name)
|
|
|
|
|
tensor = var.get_tensor()
|
|
|
|
|
in_val = inputs[in_name]
|
|
|
|
|
in_array = in_val[0] if isinstance(in_val, tuple) else in_val
|
|
|
|
|
tensor.set_dims(in_array.shape)
|
|
|
|
|
tensor.set(in_array, place)
|
|
|
|
|
if isinstance(in_val, tuple):
|
|
|
|
|
tensor.set_lod(in_val[1])
|
|
|
|
|
__set_input__(in_name, inputs[in_name])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_output_grad(scope, op, outputs, place):
|
|
|
|
|
def __set_tensor__(name):
|
|
|
|
|
out_tensor = scope.find_var(name).get_tensor()
|
|
|
|
|
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
|
|
|
|
|
out_dtype = out_tensor.dtype()
|
|
|
|
|
if out_dtype == core.DataType.FP64:
|
|
|
|
|
data = np.ones(out_tensor.shape(), dtype=np.float64)
|
|
|
|
|
elif out_dtype == core.DataType.FP32:
|
|
|
|
|
data = np.ones(out_tensor.shape(), dtype=np.float32)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Not supported data type " + str(out_dtype))
|
|
|
|
|
|
|
|
|
|
grad_tensor.set(data, place)
|
|
|
|
|
|
|
|
|
|
for out_name, out_dup in Operator.get_op_outputs(op.type()):
|
|
|
|
|
if out_name in outputs:
|
|
|
|
|
if out_dup:
|
|
|
|
|
sub_out = outputs[out_name]
|
|
|
|
|
for sub_out_name, _ in sub_out:
|
|
|
|
|
out_tensor = scope.find_var(sub_out_name).get_tensor()
|
|
|
|
|
grad_tensor = scope.new_var(grad_var_name(
|
|
|
|
|
sub_out_name)).get_tensor()
|
|
|
|
|
grad_tensor.set_dims(out_tensor.shape())
|
|
|
|
|
data = np.ones(out_tensor.shape(), dtype=np.float32)
|
|
|
|
|
grad_tensor.set(data, place)
|
|
|
|
|
__set_tensor__(sub_out_name)
|
|
|
|
|
else:
|
|
|
|
|
out_tensor = scope.find_var(out_name).get_tensor()
|
|
|
|
|
grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor(
|
|
|
|
|
)
|
|
|
|
|
grad_tensor.set_dims(out_tensor.shape())
|
|
|
|
|
data = np.ones(out_tensor.shape(), dtype=np.float32)
|
|
|
|
|
grad_tensor.set(data, place)
|
|
|
|
|
__set_tensor__(out_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_numeric_gradient(scope,
|
|
|
|
@ -96,7 +93,6 @@ def get_numeric_gradient(scope,
|
|
|
|
|
output_names,
|
|
|
|
|
delta=0.005,
|
|
|
|
|
in_place=False):
|
|
|
|
|
|
|
|
|
|
set_input(scope, op, inputs, core.CPUPlace())
|
|
|
|
|
|
|
|
|
|
tensor_to_check = scope.find_var(input_to_check).get_tensor()
|
|
|
|
@ -115,7 +111,29 @@ def get_numeric_gradient(scope,
|
|
|
|
|
|
|
|
|
|
tensor_to_check = scope.find_var(input_to_check).get_tensor()
|
|
|
|
|
tensor_size = product(tensor_to_check.get_dims())
|
|
|
|
|
gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32')
|
|
|
|
|
tensor_to_check_dtype = tensor_to_check.dtype()
|
|
|
|
|
if tensor_to_check_dtype == core.DataType.FP32:
|
|
|
|
|
tensor_to_check_dtype = np.float32
|
|
|
|
|
elif tensor_to_check_dtype == core.DataType.FP64:
|
|
|
|
|
tensor_to_check_dtype = np.float64
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Not supported data type " + str(
|
|
|
|
|
tensor_to_check_dtype))
|
|
|
|
|
|
|
|
|
|
gradient_flat = np.zeros(shape=(tensor_size, ), dtype=tensor_to_check_dtype)
|
|
|
|
|
|
|
|
|
|
def __get_elem__(tensor, i):
|
|
|
|
|
if tensor_to_check_dtype == np.float32:
|
|
|
|
|
return tensor.get_float_element(i)
|
|
|
|
|
else:
|
|
|
|
|
return tensor.get_double_element(i)
|
|
|
|
|
|
|
|
|
|
def __set_elem__(tensor, i, e):
|
|
|
|
|
if tensor_to_check_dtype == np.float32:
|
|
|
|
|
tensor.set_float_element(i, e)
|
|
|
|
|
else:
|
|
|
|
|
tensor.set_double_element(i, e)
|
|
|
|
|
|
|
|
|
|
# we only compute gradient of one element each time.
|
|
|
|
|
# we use a for loop to compute the gradient of every element.
|
|
|
|
|
for i in xrange(tensor_size):
|
|
|
|
@ -123,20 +141,20 @@ def get_numeric_gradient(scope,
|
|
|
|
|
set_input(scope, op, inputs, core.CPUPlace())
|
|
|
|
|
|
|
|
|
|
# get one input element throw it's index i.
|
|
|
|
|
origin = tensor_to_check.get_float_element(i)
|
|
|
|
|
origin = __get_elem__(tensor_to_check, i)
|
|
|
|
|
# add delta to it, run op and then get the sum of the result tensor.
|
|
|
|
|
x_pos = origin + delta
|
|
|
|
|
tensor_to_check.set_float_element(i, x_pos)
|
|
|
|
|
__set_elem__(tensor_to_check, i, x_pos)
|
|
|
|
|
y_pos = get_output()
|
|
|
|
|
|
|
|
|
|
if in_place:
|
|
|
|
|
set_input(scope, op, inputs, core.CPUPlace())
|
|
|
|
|
|
|
|
|
|
x_neg = origin - delta
|
|
|
|
|
tensor_to_check.set_float_element(i, x_neg)
|
|
|
|
|
__set_elem__(tensor_to_check, i, x_neg)
|
|
|
|
|
y_neg = get_output()
|
|
|
|
|
|
|
|
|
|
tensor_to_check.set_float_element(i, origin)
|
|
|
|
|
__set_elem__(tensor_to_check, i, origin)
|
|
|
|
|
gradient_flat[i] = (y_pos - y_neg) / delta / 2
|
|
|
|
|
|
|
|
|
|
return gradient_flat.reshape(tensor_to_check.get_dims())
|
|
|
|
|