|
|
@ -14,6 +14,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import print_function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import six
|
|
|
|
import unittest
|
|
|
|
import unittest
|
|
|
|
from functools import partial
|
|
|
|
from functools import partial
|
|
|
|
import numpy as np
|
|
|
|
import numpy as np
|
|
|
@ -24,6 +25,24 @@ import contextlib
|
|
|
|
paddle.enable_static()
|
|
|
|
paddle.enable_static()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fake_imdb_reader(word_dict_size,
|
|
|
|
|
|
|
|
sample_num,
|
|
|
|
|
|
|
|
lower_seq_len=100,
|
|
|
|
|
|
|
|
upper_seq_len=200,
|
|
|
|
|
|
|
|
class_dim=2):
|
|
|
|
|
|
|
|
def __reader__():
|
|
|
|
|
|
|
|
for _ in six.moves.range(sample_num):
|
|
|
|
|
|
|
|
length = np.random.random_integers(
|
|
|
|
|
|
|
|
low=lower_seq_len, high=upper_seq_len, size=[1])[0]
|
|
|
|
|
|
|
|
ids = np.random.random_integers(
|
|
|
|
|
|
|
|
low=0, high=word_dict_size - 1, size=[length]).astype('int64')
|
|
|
|
|
|
|
|
label = np.random.random_integers(
|
|
|
|
|
|
|
|
low=0, high=class_dim - 1, size=[1]).astype('int64')[0]
|
|
|
|
|
|
|
|
yield ids, label
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return __reader__
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_places():
|
|
|
|
def get_places():
|
|
|
|
places = [fluid.CPUPlace()]
|
|
|
|
places = [fluid.CPUPlace()]
|
|
|
|
if fluid.core.is_compiled_with_cuda():
|
|
|
|
if fluid.core.is_compiled_with_cuda():
|
|
|
@ -68,10 +87,11 @@ def bow_net(data,
|
|
|
|
|
|
|
|
|
|
|
|
class TestWeightDecay(unittest.TestCase):
|
|
|
|
class TestWeightDecay(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
def setUp(self):
|
|
|
|
self.word_dict = paddle.dataset.imdb.word_dict()
|
|
|
|
self.word_dict_len = 5147
|
|
|
|
reader = paddle.batch(
|
|
|
|
batch_size = 2
|
|
|
|
paddle.dataset.imdb.train(self.word_dict), batch_size=2)()
|
|
|
|
reader = fake_imdb_reader(self.word_dict_len, batch_size * 100)
|
|
|
|
self.train_data = [next(reader) for _ in range(5)]
|
|
|
|
reader = paddle.batch(reader, batch_size=batch_size)()
|
|
|
|
|
|
|
|
self.train_data = [next(reader) for _ in range(3)]
|
|
|
|
self.learning_rate = .5
|
|
|
|
self.learning_rate = .5
|
|
|
|
|
|
|
|
|
|
|
|
def run_program(self, place, feed_list):
|
|
|
|
def run_program(self, place, feed_list):
|
|
|
@ -103,7 +123,7 @@ class TestWeightDecay(unittest.TestCase):
|
|
|
|
data = fluid.layers.data(
|
|
|
|
data = fluid.layers.data(
|
|
|
|
name="words", shape=[1], dtype="int64", lod_level=1)
|
|
|
|
name="words", shape=[1], dtype="int64", lod_level=1)
|
|
|
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
|
|
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
|
|
|
avg_cost = model(data, label, len(self.word_dict))
|
|
|
|
avg_cost = model(data, label, self.word_dict_len)
|
|
|
|
AdamW = fluid.contrib.extend_with_decoupled_weight_decay(
|
|
|
|
AdamW = fluid.contrib.extend_with_decoupled_weight_decay(
|
|
|
|
fluid.optimizer.Adam)
|
|
|
|
fluid.optimizer.Adam)
|
|
|
|
|
|
|
|
|
|
|
@ -127,7 +147,7 @@ class TestWeightDecay(unittest.TestCase):
|
|
|
|
name="words", shape=[1], dtype="int64", lod_level=1)
|
|
|
|
name="words", shape=[1], dtype="int64", lod_level=1)
|
|
|
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
|
|
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
|
|
|
|
|
|
|
|
|
|
|
avg_cost = model(data, label, len(self.word_dict))
|
|
|
|
avg_cost = model(data, label, self.word_dict_len)
|
|
|
|
|
|
|
|
|
|
|
|
param_list = [(var, var * self.learning_rate)
|
|
|
|
param_list = [(var, var * self.learning_rate)
|
|
|
|
for var in main_prog.block(0).all_parameters()]
|
|
|
|
for var in main_prog.block(0).all_parameters()]
|
|
|
|