You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
394 lines
13 KiB
394 lines
13 KiB
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""This is the lib for gradient checker unittest."""
|
|
|
|
from __future__ import print_function
|
|
|
|
import unittest
|
|
import six
|
|
import collections
|
|
import numpy as np
|
|
from itertools import product
|
|
|
|
import paddle.fluid as fluid
|
|
import paddle.fluid.core as core
|
|
from paddle.fluid.executor import Executor
|
|
from paddle.fluid.backward import _append_grad_suffix_, _as_list
|
|
|
|
|
|
def _product(t):
|
|
if isinstance(t, int):
|
|
return t
|
|
else:
|
|
return np.product(t)
|
|
|
|
|
|
def dtype_to_np_dtype(dtype):
|
|
if dtype == core.VarDesc.VarType.FP32:
|
|
return np.float32
|
|
elif dtype == core.VarDesc.VarType.FP64:
|
|
return np.float64
|
|
elif dtype == core.VarDesc.VarType.FP16:
|
|
return np.float16
|
|
else:
|
|
raise ValueError("Not supported data type " + str(dtype))
|
|
|
|
|
|
def _get_item(t, i, np_dtype):
|
|
if np_dtype == np.float16:
|
|
np_t = np.array(t).astype(np.float16)
|
|
np_t = np_t.flatten()
|
|
return np_t[i]
|
|
elif np_dtype == np.float32:
|
|
return t._get_float_element(i)
|
|
elif np_dtype == np.float64:
|
|
return t._get_double_element(i)
|
|
else:
|
|
raise ValueError("Not supported data type " + str(np_dtype))
|
|
|
|
|
|
def _set_item(t, i, e, np_dtype):
|
|
if np_dtype == np.float16:
|
|
np_t = np.array(t).astype(np.float16)
|
|
shape = np_t.shape
|
|
np_t = np_t.flatten()
|
|
np_t[i] = e
|
|
np_t = np_t.reshape(shape).view(np.uint16)
|
|
t.set(np_t, place)
|
|
elif np_dtype == np.float32:
|
|
t._set_float_element(i, e)
|
|
elif np_dtype == np.float64:
|
|
t._set_double_element(i, e)
|
|
else:
|
|
raise ValueError("Not supported data type " + str(np_dtype))
|
|
|
|
|
|
def set_var_in_scope(scope, place, name, value, recursive_seq_len=None):
|
|
t = scope.var(name).get_tensor()
|
|
t.set(value, place)
|
|
if recursive_seq_len:
|
|
t.set_recursive_sequence_lengths(recursive_seq_len)
|
|
return t
|
|
|
|
|
|
def var_to_np_array_in_scope(scope, place, name):
|
|
return np.array(scope.var(name).get_tensor())
|
|
|
|
|
|
def make_jacobian(x, y_size, np_dtype):
|
|
if isinstance(x, fluid.framework.Variable):
|
|
return np.zeros((_product(x.shape), y_size), dtype=np_dtype)
|
|
elif isinstance(x, collections.Sequence):
|
|
jacobians = list(
|
|
filter(lambda t: t is not None, (make_jacobian(
|
|
item, y_size, np_dtype) for item in x)))
|
|
return jacobians
|
|
else:
|
|
None
|
|
|
|
|
|
def _compute_numerical_jacobian(program, x, y, place, scope, delta):
|
|
"""Computes the numeric Jacobian for dy/dx.
|
|
|
|
Computes the numeric Jacobian by slightly perturbing the inputs and
|
|
measuring the differences on the output.
|
|
|
|
Args:
|
|
program (Program): the network program.
|
|
x (Variable): the input variables.
|
|
y (list[Variable]): the output variables.
|
|
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
|
|
scope (Scope): the scope used to run program.
|
|
delta: the amount of perturbation we give to the input
|
|
|
|
Returns:
|
|
A list of 2-D numpy array, the list length is len(y).
|
|
Each 2-D numpy array represents the Jacobian for dy_i/dx.
|
|
It has "x_size" rows and "y_size" columns
|
|
where "x_size" is the number of elements in x and
|
|
"y_size" is the number of elements in each y_i.
|
|
"""
|
|
if not isinstance(x, fluid.framework.Variable):
|
|
raise TypeError('x is not Variable')
|
|
|
|
# To compute the jacobian, treat x and y as one-dimensional vectors.
|
|
y = _as_list(y)
|
|
exe = fluid.Executor(place)
|
|
|
|
def run():
|
|
y_res = exe.run(program, scope=scope, fetch_list=y)
|
|
return [yi.flatten() for yi in y_res]
|
|
|
|
x_name = x.name
|
|
x_shape = x.shape
|
|
x_size = _product(x_shape)
|
|
x_t = scope.find_var(x_name).get_tensor()
|
|
|
|
np_type = dtype_to_np_dtype(x.dtype)
|
|
jacobian = [make_jacobian(x, _product(yi.shape), np_type) for yi in y]
|
|
|
|
for i in six.moves.xrange(x_size):
|
|
orig = _get_item(x_t, i, np_type)
|
|
x_pos = orig + delta
|
|
_set_item(x_t, i, x_pos, np_type)
|
|
y_pos = run()
|
|
|
|
x_neg = orig - delta
|
|
_set_item(x_t, i, x_neg, np_type)
|
|
y_neg = run()
|
|
|
|
_set_item(x_t, i, orig, np_type)
|
|
|
|
for j in six.moves.xrange(len(y)):
|
|
jacobian[j][i, :] = (y_pos[j] - y_neg[j]) / delta / 2.
|
|
|
|
return jacobian
|
|
|
|
|
|
def _compute_analytical_jacobian(program, x, y, place, scope):
|
|
"""Computes the analytical Jacobian for dy/dx.
|
|
|
|
Args:
|
|
program (Program): a Program with forward pass.
|
|
x (Variable|list[Variable]): a variable or list of variable
|
|
y (Variable): the target variable.
|
|
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
|
|
scope (Scope): the scope used to run program.
|
|
|
|
Returns:
|
|
A list of 2-D numpy array. The list length is len(x).
|
|
Each 2-D numpy array represents the Jacobian for dy/dx_i.
|
|
It has "xi_size" rows and "dy_size" columns
|
|
where "x_size" is the number of elements in x_i and
|
|
"dy_size" is the number of elements in y.
|
|
"""
|
|
if not isinstance(y, fluid.framework.Variable):
|
|
raise TypeError('y is not Variable')
|
|
|
|
dy_name = _append_grad_suffix_(y.name)
|
|
|
|
np_type = dtype_to_np_dtype(y.dtype)
|
|
# create dy Variable in Program
|
|
dy = program.global_block().create_var(
|
|
name=dy_name, shape=y.shape, dtype=np_type, persistable=True)
|
|
# append backward
|
|
dx = fluid.gradients(y, x, dy)
|
|
|
|
# init dy tensor in scope
|
|
value = np.zeros(y.shape, dtype=np_type)
|
|
dy_t = set_var_in_scope(scope, place, dy_name, value)
|
|
|
|
exe = fluid.Executor(place)
|
|
|
|
y_size = _product(y.shape)
|
|
|
|
x = _as_list(x)
|
|
jacobian = make_jacobian(x, y_size, np_type)
|
|
|
|
# filter None in dx for DX/DY may be None in kernel
|
|
# only fetch not None dx in exe.run
|
|
filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None]
|
|
filted_idx, filted_dx = zip(*filted)
|
|
|
|
for i in six.moves.xrange(y_size):
|
|
_set_item(dy_t, i, 1, np_type)
|
|
|
|
dx_res = exe.run(program, scope=scope, fetch_list=filted_dx)
|
|
|
|
for j in six.moves.xrange(len(filted_dx)):
|
|
dx_idx = filted_idx[j]
|
|
if dx_res[j] is not None:
|
|
jacobian[dx_idx][:, i] = dx_res[j].flatten()
|
|
else:
|
|
jacobian[dx_idx][:, i] = np.zeros(
|
|
dx[dx_idx].shape, dtype=np_type).flatten()
|
|
|
|
_set_item(dy_t, i, 0, np_type)
|
|
|
|
return jacobian
|
|
|
|
|
|
def grad_check(x,
|
|
y,
|
|
x_init=None,
|
|
place=None,
|
|
program=None,
|
|
eps=1e-6,
|
|
atol=1e-5,
|
|
rtol=1e-3,
|
|
raise_exception=True):
|
|
"""
|
|
Check numerical and analytical gradients for dy/dx.
|
|
Each Jacobian gradients is a 2-D array with shape [xi_size, yi_size].
|
|
|
|
Args:
|
|
x (Variable|list[Variable]): input variables to the program.
|
|
y (Variable|list[Variable]): output variables to the program.
|
|
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
|
|
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
|
|
program (Program|None): a Program with forward pass.
|
|
If None, use fluid.default_main_program().
|
|
eps (float): perturbation for finite differences.
|
|
atol (float): absolute tolerance.
|
|
rtol (float): relative tolerance.
|
|
raise_exception (bool): whether to raise an exception if
|
|
the check fails. Default is True.
|
|
Returns:
|
|
True if all differences satisfy numpy.allclose condition.
|
|
"""
|
|
|
|
def fail_test(msg):
|
|
if raise_exception:
|
|
raise RuntimeError(msg)
|
|
return False
|
|
|
|
# check input arguments
|
|
x = _as_list(x)
|
|
y = _as_list(y)
|
|
|
|
for v in x:
|
|
v.stop_gradient = False
|
|
v.persistable = True
|
|
if place is None:
|
|
place = fluid.CPUPlace()
|
|
if program is None:
|
|
program = fluid.default_main_program()
|
|
|
|
# init variable in strtup program
|
|
scope = fluid.executor.global_scope()
|
|
exe = fluid.Executor(place)
|
|
exe.run(fluid.default_startup_program())
|
|
|
|
x_init = _as_list(x_init)
|
|
# init inputs if x_init is not None
|
|
if x_init:
|
|
if len(x_init) != len(x):
|
|
raise ValueError('len(x_init) (=%d) is not the same'
|
|
' as len(x) (= %d)' % (len(x_init), len(x)))
|
|
# init variable in main program
|
|
for var, arr in zip(x, x_init):
|
|
assert var.shape == arr.shape
|
|
feeds = {k.name: v for k, v in zip(x, x_init)}
|
|
exe.run(program, feed=feeds, scope=scope)
|
|
|
|
# [x_idx, y_idx]
|
|
numerical = [
|
|
_compute_numerical_jacobian(program, xi, y, place, scope, eps)
|
|
for xi in x
|
|
]
|
|
|
|
# [y_idx, x_idx]
|
|
analytical = []
|
|
for yi in y:
|
|
prog = program.clone()
|
|
|
|
clone_x = []
|
|
clone_y = None
|
|
for b in prog.blocks:
|
|
if b.has_var(yi.name):
|
|
clone_y = b.var(yi.name)
|
|
break
|
|
for xi in x:
|
|
for b in prog.blocks:
|
|
if b.has_var(xi.name):
|
|
clone_x.append(b.var(xi.name))
|
|
break
|
|
|
|
analytical.append(
|
|
_compute_analytical_jacobian(prog, clone_x, clone_y, place, scope))
|
|
|
|
for i, (x_idx,
|
|
y_idx) in enumerate(product(*[range(len(x)), range(len(y))])):
|
|
a = analytical[y_idx][x_idx]
|
|
n = numerical[x_idx][y_idx]
|
|
if not np.allclose(a, n, rtol, atol):
|
|
msg = 'Jacobian mismatch for output %s ' \
|
|
'with respect to input %s on %s,\n' \
|
|
'numerical:%s\nanalytical:%s\n' \
|
|
% (y[y_idx].name, x[x_idx].name, str(place), n, a)
|
|
return fail_test(msg)
|
|
return True
|
|
|
|
|
|
def double_grad_check(x,
|
|
y,
|
|
x_init=None,
|
|
y_grads=None,
|
|
place=None,
|
|
program=None,
|
|
eps=1e-6,
|
|
atol=1e-5,
|
|
rtol=1e-3,
|
|
raise_exception=True):
|
|
"""
|
|
Check gradients of gradients. This function will append backward to the
|
|
program before second order gradient check.
|
|
|
|
Args:
|
|
x (Variable|list[Variable]): input variables to the program.
|
|
y (Variable|list[Variable]): output variables to the program.
|
|
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
|
|
y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y.
|
|
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
|
|
program (Program|None): a Program with forward pass.
|
|
If None, use fluid.default_main_program().
|
|
eps (float): perturbation for finite differences.
|
|
atol (float): absolute tolerance.
|
|
rtol (float): relative tolerance.
|
|
raise_exception (bool): whether to raise an exception if
|
|
the check fails. Default is True.
|
|
Returns:
|
|
True if all differences satisfy numpy.allclose condition.
|
|
"""
|
|
# check input arguments
|
|
x = _as_list(x)
|
|
for v in x:
|
|
v.stop_gradient = False
|
|
v.persistable = True
|
|
y = _as_list(y)
|
|
|
|
if program is None:
|
|
program = fluid.default_main_program()
|
|
|
|
if y_grads is None:
|
|
scope = fluid.executor.global_scope()
|
|
y_grads = []
|
|
y_grads_init = []
|
|
for yi in y:
|
|
dyi_name = _append_grad_suffix_(yi.name)
|
|
np_type = dtype_to_np_dtype(yi.dtype)
|
|
dy = program.global_block().create_var(
|
|
name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True)
|
|
dy.stop_gradient = False
|
|
v = np.random.random(size=yi.shape).astype(np_type)
|
|
set_var_in_scope(scope, place, dyi_name, v)
|
|
y_grads.append(dy)
|
|
y_grads_init.append(v)
|
|
else:
|
|
y_grads = _as_list(y_grads)
|
|
y_grads_init = [
|
|
var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
|
|
]
|
|
|
|
# append first order grads
|
|
target_grads = fluid.gradients(y, x, y_grads)
|
|
|
|
# y_grads are the input of first-order backward,
|
|
# so, they are also the input of second-order backward.
|
|
x += y_grads
|
|
x_init = _as_list(x_init)
|
|
x_init += y_grads_init
|
|
|
|
grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
|