You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
255 lines
8.0 KiB
255 lines
8.0 KiB
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import print_function
|
|
|
|
import contextlib
|
|
import unittest
|
|
import numpy as np
|
|
import six
|
|
|
|
import paddle
|
|
import paddle.fluid as fluid
|
|
from paddle.fluid import core
|
|
|
|
from paddle.fluid.dygraph.base import to_variable
|
|
|
|
from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
|
|
|
|
|
|
class TestGradClipByGlobalNorm(unittest.TestCase):
|
|
def init_value(self):
|
|
self.max_global_norm = 5.0
|
|
self.init_scale = 1.0
|
|
|
|
self.shape = (20, 20)
|
|
|
|
def generate_p_g(self):
|
|
|
|
self.para_and_grad = []
|
|
for i in range(10):
|
|
self.para_and_grad.append(
|
|
(np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32'),
|
|
np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32')))
|
|
|
|
def get_numpy_global_norm_result(self):
|
|
gloabl_norm = 0.0
|
|
for p, g in self.para_and_grad:
|
|
gloabl_norm += np.sum(np.square(g))
|
|
|
|
gloabl_norm_np = np.sqrt(gloabl_norm)
|
|
|
|
new_np_p_g = []
|
|
scale = 1.0
|
|
if gloabl_norm_np > self.max_global_norm:
|
|
scale = self.max_global_norm / gloabl_norm_np
|
|
|
|
for p, g in self.para_and_grad:
|
|
new_np_p_g.append((p, g * scale))
|
|
|
|
return new_np_p_g
|
|
|
|
def get_dygrap_global_norm_result(self):
|
|
with fluid.dygraph.guard():
|
|
|
|
gloabl_norm_clip = GradClipByGlobalNorm(self.max_global_norm)
|
|
p_g_var = []
|
|
for p, g in self.para_and_grad:
|
|
new_p = to_variable(p)
|
|
new_g = to_variable(g)
|
|
p_g_var.append((new_p, new_g))
|
|
|
|
new_p_g_var = gloabl_norm_clip(p_g_var)
|
|
|
|
p_g_dy_out = []
|
|
for p, g in new_p_g_var:
|
|
p_g_dy_out.append((p.numpy(), g.numpy()))
|
|
|
|
return p_g_dy_out
|
|
|
|
def test_clip_by_global_norm(self):
|
|
self.init_value()
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_global_norm_result()
|
|
dy_out_p_g = self.get_dygrap_global_norm_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
def test_clip_by_global_norm_2(self):
|
|
self.init_value()
|
|
|
|
self.init_scale = 0.2
|
|
self.max_global_norm = 10
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_global_norm_result()
|
|
dy_out_p_g = self.get_dygrap_global_norm_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
|
|
class TestGradClipByNorm(unittest.TestCase):
|
|
def init_value(self):
|
|
self.max_norm = 5.0
|
|
self.init_scale = 1.0
|
|
|
|
self.shape = (10, 10)
|
|
|
|
def generate_p_g(self):
|
|
|
|
self.para_and_grad = []
|
|
for i in range(10):
|
|
self.para_and_grad.append(
|
|
(np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32'),
|
|
np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32')))
|
|
|
|
def get_numpy_norm_result(self):
|
|
|
|
new_p_g = []
|
|
for p, g in self.para_and_grad:
|
|
norm = np.sqrt(np.sum(np.square(g)))
|
|
|
|
if norm > self.max_norm:
|
|
new_p_g.append((p, g * self.max_norm / norm))
|
|
else:
|
|
new_p_g.append((p, g))
|
|
|
|
return new_p_g
|
|
|
|
def get_dygrap_norm_result(self):
|
|
with fluid.dygraph.guard():
|
|
|
|
norm_clip = GradClipByNorm(self.max_norm)
|
|
p_g_var = []
|
|
for p, g in self.para_and_grad:
|
|
new_p = to_variable(p)
|
|
new_g = to_variable(g)
|
|
p_g_var.append((new_p, new_g))
|
|
|
|
new_p_g_var = norm_clip(p_g_var)
|
|
|
|
p_g_dy_out = []
|
|
for p, g in new_p_g_var:
|
|
p_g_dy_out.append((p.numpy(), g.numpy()))
|
|
|
|
return p_g_dy_out
|
|
|
|
def test_clip_by_norm(self):
|
|
self.init_value()
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_norm_result()
|
|
dy_out_p_g = self.get_dygrap_norm_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
def test_clip_by_norm_2(self):
|
|
self.init_value()
|
|
|
|
self.init_scale = 0.2
|
|
self.max_norm = 10.0
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_norm_result()
|
|
dy_out_p_g = self.get_dygrap_norm_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
|
|
class TestGradClipByValue(unittest.TestCase):
|
|
def init_value(self):
|
|
self.max_value = 0.8
|
|
self.min_value = -0.1
|
|
self.init_scale = 1.0
|
|
|
|
self.shape = (10, 10)
|
|
|
|
def generate_p_g(self):
|
|
|
|
self.para_and_grad = []
|
|
for i in range(10):
|
|
self.para_and_grad.append(
|
|
(np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32'),
|
|
np.random.uniform(-self.init_scale, self.init_scale,
|
|
self.shape).astype('float32')))
|
|
|
|
def get_numpy_clip_result(self):
|
|
|
|
new_p_g = []
|
|
for p, g in self.para_and_grad:
|
|
new_p_g.append((p, np.clip(g, self.min_value, self.max_value)))
|
|
|
|
return new_p_g
|
|
|
|
def get_dygrap_clip_result(self):
|
|
with fluid.dygraph.guard():
|
|
|
|
value_clip = GradClipByValue(self.min_value, self.max_value)
|
|
p_g_var = []
|
|
for p, g in self.para_and_grad:
|
|
new_p = to_variable(p)
|
|
new_g = to_variable(g)
|
|
p_g_var.append((new_p, new_g))
|
|
|
|
new_p_g_var = value_clip(p_g_var)
|
|
|
|
p_g_dy_out = []
|
|
for p, g in new_p_g_var:
|
|
p_g_dy_out.append((p.numpy(), g.numpy()))
|
|
|
|
return p_g_dy_out
|
|
|
|
def test_clip_by_value(self):
|
|
self.init_value()
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_clip_result()
|
|
dy_out_p_g = self.get_dygrap_clip_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
def test_clip_by_norm_2(self):
|
|
self.init_value()
|
|
|
|
self.init_scale = 0.2
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_clip_result()
|
|
dy_out_p_g = self.get_dygrap_clip_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
def test_clip_by_norm_3(self):
|
|
self.init_value()
|
|
|
|
self.init_scale = 0.5
|
|
self.max_value = 0.6
|
|
self.min_value = None
|
|
self.generate_p_g()
|
|
np_p_g = self.get_numpy_clip_result()
|
|
dy_out_p_g = self.get_dygrap_clip_result()
|
|
|
|
for (p_np, g_np), (p_dy, g_dy) in zip(np_p_g, dy_out_p_g):
|
|
self.assertTrue(np.allclose(g_np, g_dy, rtol=1e-6, atol=1e-8))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|