developing GradientClipByGlobalNorm

add_depthwiseConv_op_gpu
fengjiayi 7 years ago
parent d23ea4ef8e
commit adc26dffa9

@ -1,5 +1,6 @@
import functools import functools
import layers import layers
from framework import Variable
from . import core from . import core
__all__ = [ __all__ = [
@ -44,7 +45,7 @@ def error_clip_callback(block, context):
class BaseGradientClipAttr(object): class BaseGradientClipAttr(object):
def process_context(self, context, p_g): def process_context(self, context, param, grad):
raise NotImplementedError() raise NotImplementedError()
def create_operators(self, param, grad): def create_operators(self, param, grad):
@ -52,7 +53,7 @@ class BaseGradientClipAttr(object):
class NullGradientClipAttr(BaseGradientClipAttr): class NullGradientClipAttr(BaseGradientClipAttr):
def process_context(self, context, p_g): def process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def create_operators(self, param, grad):
@ -69,7 +70,7 @@ class GradientClipByValue(BaseGradientClipAttr):
self.max = max self.max = max
self.min = min self.min = min
def process_context(self, context, p_g): def process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def create_operators(self, param, grad):
@ -81,7 +82,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
def __init__(self, clip_norm): def __init__(self, clip_norm):
self.clip_norm = clip_norm self.clip_norm = clip_norm
def process_context(self, context, p_g): def process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def create_operators(self, param, grad):
@ -89,6 +90,46 @@ class GradientClipByNorm(BaseGradientClipAttr):
return param, new_grad return param, new_grad
class GradientClipByGlobalNorm(BaseGradientClipAttr):
global_norm_var = None
clip_norm_var = None
ratio_var = None
@classmethod
def init(cls, clip_norm):
cls.global_norm_var = layers.fill_constant(
shape=[1], dtype="float32", value=0.0)
cls.clip_norm_var = layers.fill_constant(
shape=[1], dtype="float32", value=clip_norm)
def __init__(self):
if not (isinstance(self.__class__.global_norm_var, Variable) and
isinstance(self.__class__.clip_norm_var, Variable)):
raise ValueError(
"Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first."
)
def process_context(self, context, param, grad):
local_norm_var = layers.reduce_sum(
x=layers.pow(x=grad, factor=2), reduce_all=True)
layers.sums(
input=[local_norm_var, self.__class__.global_norm_var],
out=[self.__class__.global_norm_var])
def create_operators(self, param, grad):
if self.__class__.ratio_var is None:
self.__class__.global_norm_var = layers.sqrt(
x=self.__class__.global_norm_var)
self.__class__.ratio_var = layers.elementwise_div(
x=self.__class__.clip_norm_var,
y=layers.elementwise_max(
x=self.__class__.clip_norm_var,
y=self.__class__.global_norm_var))
# 缺乏elementwise_max
# 没法将ratio_var送给scale_op。
# new_grad = layers.
def append_gradient_clip_ops(param_grad): def append_gradient_clip_ops(param_grad):
context = dict() context = dict()
create_op_callbacks = [] create_op_callbacks = []
@ -98,10 +139,9 @@ def append_gradient_clip_ops(param_grad):
clip_attr = NullGradientClipAttr() clip_attr = NullGradientClipAttr()
if not isinstance(clip_attr, BaseGradientClipAttr): if not isinstance(clip_attr, BaseGradientClipAttr):
raise TypeError( raise TypeError(
"clip attribute should be an instance of BaseGradientClippingAttr" "clip attribute should be an instance of BaseGradientClipAttr")
)
clip_attr.process_context(context=context, p_g=param_grad) clip_attr.process_context(context=context, param=p, grad=g)
create_op_callbacks.append( create_op_callbacks.append(
functools.partial( functools.partial(
clip_attr.create_operators, param=p, grad=g)) clip_attr.create_operators, param=p, grad=g))

@ -1,23 +1,15 @@
from ..registry import register_layer from ..registry import register_layer
__activations__ = [ __activations__ = [
'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round' 'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round',
'pow'
] ]
__all__ = [ __all__ = [
'mean', 'mean', 'mul', 'reshape', 'scale', 'transpose',
'mul', 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
'reshape', 'elementwise_sub', 'elementwise_mul', 'clip', 'clip_by_norm',
'scale', 'sequence_softmax', 'reduce_sum'
'transpose',
'sigmoid_cross_entropy_with_logits',
'elementwise_add',
'elementwise_div',
'elementwise_sub',
'elementwise_mul',
'clip',
'clip_by_norm',
'sequence_softmax',
] + __activations__ ] + __activations__
for _OP in set(__all__): for _OP in set(__all__):

Loading…
Cancel
Save