@ -1,16 +1,31 @@
import unittest
import numpy
import paddle . v2 . framework . core as core
import paddle . v2 . framework . core as core
from paddle . v2 . framework . op import Operator
from paddle . v2 . framework . op import Operator
import numpy
import unittest
__all__ = [ ' get_numeric_gradient ' ]
__all__ = [ ' get_numeric_gradient ' ]
def create_op ( op_type ) :
kwargs = dict ( )
for in_name in Operator . get_op_input_names ( op_type ) :
kwargs [ in_name ] = in_name
for out_name in Operator . get_op_output_names ( op_type ) :
kwargs [ out_name ] = out_name
return Operator ( op_type , * * kwargs )
def grad_var_name ( var_name ) :
return var_name + " @GRAD "
def get_numeric_gradient ( op ,
def get_numeric_gradient ( op ,
input_values ,
input_values ,
output_name ,
output_name ,
input_to_check ,
input_to_check ,
delta = 1e-2 ,
delta = 0.005 ,
local_scope = None ) :
local_scope = None ) :
"""
"""
Get Numeric Gradient for an operator ' s input.
Get Numeric Gradient for an operator ' s input.
@ -76,6 +91,113 @@ def get_numeric_gradient(op,
return gradient_flat . reshape ( tensor_to_check . get_dims ( ) )
return gradient_flat . reshape ( tensor_to_check . get_dims ( ) )
class GradientChecker ( unittest . TestCase ) :
def __is_close ( self , numeric_grads , scope , max_relative_error ) :
for name in numeric_grads :
op_grad = numpy . array (
scope . find_var ( grad_var_name ( name ) ) . get_tensor ( ) )
is_close = numpy . allclose (
numeric_grads [ name ] , op_grad , rtol = max_relative_error , atol = 100 )
if not is_close :
return False
return True
def check_grad ( self ,
forward_op ,
input_vars ,
inputs_to_check ,
output_name ,
no_grad_set = None ,
only_cpu = False ,
max_relative_error = 0.005 ) :
"""
: param forward_op : used to create backward_op
: param input_vars : numpy value of input variable . The following
computation will use these variables .
: param inputs_to_check : inputs var names that should check gradient .
: param output_name : output name that used to
: param max_relative_error : The relative tolerance parameter .
: param no_grad_set : used when create backward ops
: param only_cpu : only compute and check gradient on cpu kernel .
: return :
"""
if no_grad_set is None :
no_grad_set = set ( )
tmp_outs = forward_op . temp_outputs ( )
no_tmp_out = filter ( lambda name : name not in tmp_outs ,
forward_op . outputs ( ) )
if len ( no_tmp_out ) != 1 :
raise ValueError ( " non temp out_names should be 1 " )
in_names = forward_op . inputs ( )
for no_grad in no_grad_set :
if no_grad not in in_names :
raise ValueError ( " no_grad should be in in_names " )
backward_op = core . Operator . backward ( forward_op , no_grad_set )
places = [ core . CPUPlace ( ) ]
if not only_cpu and core . is_compile_gpu ( ) and backward_op . support_gpu ( ) :
places . append ( core . GPUPlace ( 0 ) )
numeric_grad = dict ( )
# get numeric gradient
for check_name in inputs_to_check :
numeric_grad [ check_name ] = \
get_numeric_gradient ( forward_op , input_vars , output_name , check_name )
# get operator gradient according to different device
for place in places :
scope = core . Scope ( )
ctx = core . DeviceContext . create ( place )
# create input var and set value
for name , value in input_vars . iteritems ( ) :
if name not in in_names :
raise ValueError ( name + " not in op.inputs_ " )
var = scope . new_var ( name ) . get_tensor ( )
var . set_dims ( value . shape )
var . set ( value , place )
# create output var
for out_name in forward_op . outputs ( ) :
scope . new_var ( out_name ) . get_tensor ( )
# infer the shape of output var and compute/set value of output var
forward_op . infer_shape ( scope )
forward_op . run ( scope , ctx )
# create output grad var
# set shape as the output var
# set value of this grad to ones
for name in forward_op . outputs ( ) :
out_tensor = scope . find_var ( name ) . get_tensor ( )
grad_tensor = scope . new_var ( grad_var_name ( name ) ) . get_tensor ( )
grad_tensor . set_dims ( out_tensor . shape ( ) )
data = 1.0 * numpy . ones ( out_tensor . shape ( ) )
grad_tensor . set ( data , place )
# create input grad var
for name in backward_op . outputs ( ) :
scope . new_var ( name ) . get_tensor ( )
# infer the shape of input gradient var and compute/set it's value
# with backward op
backward_op . infer_shape ( scope )
backward_op . run ( scope , ctx )
if isinstance ( place , core . CPUPlace ) :
msg = " CPU kernel gradient is not close to numeric gradient "
else :
if isinstance ( place , core . GPUPlace ) :
msg = " GPU kernel gradient is not close to numeric gradient "
else :
raise ValueError ( " unknown place " + type ( place ) )
self . assertTrue (
self . __is_close ( numeric_grad , scope , max_relative_error ) , msg )
if __name__ == ' __main__ ' :
if __name__ == ' __main__ ' :
class GetNumericGradientTest ( unittest . TestCase ) :
class GetNumericGradientTest ( unittest . TestCase ) :
@ -87,4 +209,28 @@ if __name__ == '__main__':
arr = get_numeric_gradient ( add_op , { ' X ' : x , " Y " : y } , ' Z ' , ' X ' )
arr = get_numeric_gradient ( add_op , { ' X ' : x , " Y " : y } , ' Z ' , ' X ' )
self . assertAlmostEqual ( arr . mean ( ) , 1.0 , delta = 1e-2 )
self . assertAlmostEqual ( arr . mean ( ) , 1.0 , delta = 1e-2 )
def test_softmax_op ( self ) :
def stable_softmax ( x ) :
""" Compute the softmax of vector x in a numerically stable way. """
shiftx = x - numpy . max ( x )
exps = numpy . exp ( shiftx )
return exps / numpy . sum ( exps )
def label_softmax_grad ( Y , dY ) :
dX = Y * 0.0
for i in range ( Y . shape [ 0 ] ) :
d = numpy . dot ( Y [ i , : ] , dY [ i , : ] )
dX [ i , : ] = Y [ i , : ] * ( dY [ i , : ] - d )
return dX
softmax_op = Operator ( " softmax " , X = " X " , Y = " Y " )
X = numpy . random . random ( ( 2 , 2 ) ) . astype ( " float32 " )
Y = numpy . apply_along_axis ( stable_softmax , 1 , X )
dY = numpy . ones ( Y . shape )
dX = label_softmax_grad ( Y , dY )
arr = get_numeric_gradient ( softmax_op , { " X " : X } , ' Y ' , ' X ' )
numpy . testing . assert_almost_equal ( arr , dX , decimal = 1e-2 )
unittest . main ( )
unittest . main ( )