|
|
|
@ -1,16 +1,31 @@
|
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
|
|
import numpy
|
|
|
|
|
import paddle.v2.framework.core as core
|
|
|
|
|
from paddle.v2.framework.op import Operator
|
|
|
|
|
import numpy
|
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
|
|
__all__ = ['get_numeric_gradient']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_op(op_type):
|
|
|
|
|
kwargs = dict()
|
|
|
|
|
for in_name in Operator.get_op_input_names(op_type):
|
|
|
|
|
kwargs[in_name] = in_name
|
|
|
|
|
for out_name in Operator.get_op_output_names(op_type):
|
|
|
|
|
kwargs[out_name] = out_name
|
|
|
|
|
|
|
|
|
|
return Operator(op_type, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def grad_var_name(var_name):
|
|
|
|
|
return var_name + "@GRAD"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_numeric_gradient(op,
|
|
|
|
|
input_values,
|
|
|
|
|
output_name,
|
|
|
|
|
input_to_check,
|
|
|
|
|
delta=1e-2,
|
|
|
|
|
delta=0.005,
|
|
|
|
|
local_scope=None):
|
|
|
|
|
"""
|
|
|
|
|
Get Numeric Gradient for an operator's input.
|
|
|
|
@ -76,6 +91,113 @@ def get_numeric_gradient(op,
|
|
|
|
|
return gradient_flat.reshape(tensor_to_check.get_dims())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GradientChecker(unittest.TestCase):
|
|
|
|
|
def __is_close(self, numeric_grads, scope, max_relative_error):
|
|
|
|
|
for name in numeric_grads:
|
|
|
|
|
op_grad = numpy.array(
|
|
|
|
|
scope.find_var(grad_var_name(name)).get_tensor())
|
|
|
|
|
is_close = numpy.allclose(
|
|
|
|
|
numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
|
|
|
|
|
if not is_close:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def check_grad(self,
|
|
|
|
|
forward_op,
|
|
|
|
|
input_vars,
|
|
|
|
|
inputs_to_check,
|
|
|
|
|
output_name,
|
|
|
|
|
no_grad_set=None,
|
|
|
|
|
only_cpu=False,
|
|
|
|
|
max_relative_error=0.005):
|
|
|
|
|
"""
|
|
|
|
|
:param forward_op: used to create backward_op
|
|
|
|
|
:param input_vars: numpy value of input variable. The following
|
|
|
|
|
computation will use these variables.
|
|
|
|
|
:param inputs_to_check: inputs var names that should check gradient.
|
|
|
|
|
:param output_name: output name that used to
|
|
|
|
|
:param max_relative_error: The relative tolerance parameter.
|
|
|
|
|
:param no_grad_set: used when create backward ops
|
|
|
|
|
:param only_cpu: only compute and check gradient on cpu kernel.
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if no_grad_set is None:
|
|
|
|
|
no_grad_set = set()
|
|
|
|
|
|
|
|
|
|
tmp_outs = forward_op.temp_outputs()
|
|
|
|
|
no_tmp_out = filter(lambda name: name not in tmp_outs,
|
|
|
|
|
forward_op.outputs())
|
|
|
|
|
if len(no_tmp_out) != 1:
|
|
|
|
|
raise ValueError("non temp out_names should be 1")
|
|
|
|
|
|
|
|
|
|
in_names = forward_op.inputs()
|
|
|
|
|
for no_grad in no_grad_set:
|
|
|
|
|
if no_grad not in in_names:
|
|
|
|
|
raise ValueError("no_grad should be in in_names")
|
|
|
|
|
|
|
|
|
|
backward_op = core.Operator.backward(forward_op, no_grad_set)
|
|
|
|
|
|
|
|
|
|
places = [core.CPUPlace()]
|
|
|
|
|
if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
|
|
|
|
|
places.append(core.GPUPlace(0))
|
|
|
|
|
|
|
|
|
|
numeric_grad = dict()
|
|
|
|
|
# get numeric gradient
|
|
|
|
|
for check_name in inputs_to_check:
|
|
|
|
|
numeric_grad[check_name] = \
|
|
|
|
|
get_numeric_gradient(forward_op, input_vars, output_name, check_name)
|
|
|
|
|
|
|
|
|
|
# get operator gradient according to different device
|
|
|
|
|
for place in places:
|
|
|
|
|
scope = core.Scope()
|
|
|
|
|
ctx = core.DeviceContext.create(place)
|
|
|
|
|
|
|
|
|
|
# create input var and set value
|
|
|
|
|
for name, value in input_vars.iteritems():
|
|
|
|
|
if name not in in_names:
|
|
|
|
|
raise ValueError(name + " not in op.inputs_")
|
|
|
|
|
var = scope.new_var(name).get_tensor()
|
|
|
|
|
var.set_dims(value.shape)
|
|
|
|
|
var.set(value, place)
|
|
|
|
|
|
|
|
|
|
# create output var
|
|
|
|
|
for out_name in forward_op.outputs():
|
|
|
|
|
scope.new_var(out_name).get_tensor()
|
|
|
|
|
|
|
|
|
|
# infer the shape of output var and compute/set value of output var
|
|
|
|
|
forward_op.infer_shape(scope)
|
|
|
|
|
forward_op.run(scope, ctx)
|
|
|
|
|
|
|
|
|
|
# create output grad var
|
|
|
|
|
# set shape as the output var
|
|
|
|
|
# set value of this grad to ones
|
|
|
|
|
for name in forward_op.outputs():
|
|
|
|
|
out_tensor = scope.find_var(name).get_tensor()
|
|
|
|
|
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
|
|
|
|
|
grad_tensor.set_dims(out_tensor.shape())
|
|
|
|
|
data = 1.0 * numpy.ones(out_tensor.shape())
|
|
|
|
|
grad_tensor.set(data, place)
|
|
|
|
|
|
|
|
|
|
# create input grad var
|
|
|
|
|
for name in backward_op.outputs():
|
|
|
|
|
scope.new_var(name).get_tensor()
|
|
|
|
|
|
|
|
|
|
# infer the shape of input gradient var and compute/set it's value
|
|
|
|
|
# with backward op
|
|
|
|
|
backward_op.infer_shape(scope)
|
|
|
|
|
backward_op.run(scope, ctx)
|
|
|
|
|
|
|
|
|
|
if isinstance(place, core.CPUPlace):
|
|
|
|
|
msg = "CPU kernel gradient is not close to numeric gradient"
|
|
|
|
|
else:
|
|
|
|
|
if isinstance(place, core.GPUPlace):
|
|
|
|
|
msg = "GPU kernel gradient is not close to numeric gradient"
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("unknown place " + type(place))
|
|
|
|
|
self.assertTrue(
|
|
|
|
|
self.__is_close(numeric_grad, scope, max_relative_error), msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
class GetNumericGradientTest(unittest.TestCase):
|
|
|
|
@ -87,4 +209,28 @@ if __name__ == '__main__':
|
|
|
|
|
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
|
|
|
|
|
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)
|
|
|
|
|
|
|
|
|
|
def test_softmax_op(self):
|
|
|
|
|
def stable_softmax(x):
|
|
|
|
|
"""Compute the softmax of vector x in a numerically stable way."""
|
|
|
|
|
shiftx = x - numpy.max(x)
|
|
|
|
|
exps = numpy.exp(shiftx)
|
|
|
|
|
return exps / numpy.sum(exps)
|
|
|
|
|
|
|
|
|
|
def label_softmax_grad(Y, dY):
|
|
|
|
|
dX = Y * 0.0
|
|
|
|
|
for i in range(Y.shape[0]):
|
|
|
|
|
d = numpy.dot(Y[i, :], dY[i, :])
|
|
|
|
|
dX[i, :] = Y[i, :] * (dY[i, :] - d)
|
|
|
|
|
return dX
|
|
|
|
|
|
|
|
|
|
softmax_op = Operator("softmax", X="X", Y="Y")
|
|
|
|
|
|
|
|
|
|
X = numpy.random.random((2, 2)).astype("float32")
|
|
|
|
|
Y = numpy.apply_along_axis(stable_softmax, 1, X)
|
|
|
|
|
dY = numpy.ones(Y.shape)
|
|
|
|
|
dX = label_softmax_grad(Y, dY)
|
|
|
|
|
|
|
|
|
|
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
|
|
|
|
|
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
|
|
|
|
|
|
|
|
|
|
unittest.main()
|
|
|
|
|