|
|
|
@ -56,7 +56,7 @@ def optimizer_setting(params):
|
|
|
|
|
#bd = [step * e for e in ls["epochs"]]
|
|
|
|
|
#base_lr = params["lr"]
|
|
|
|
|
#lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
|
|
|
|
|
optimizer = fluid.optimizer.SGD(learning_rate=0.1)
|
|
|
|
|
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
|
|
|
|
|
|
|
|
|
|
return optimizer
|
|
|
|
|
|
|
|
|
@ -109,7 +109,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
|
|
|
|
|
size=num_channels,
|
|
|
|
|
param_attr=fluid.ParamAttr(
|
|
|
|
|
initializer=fluid.initializer.Constant(value=0.05)),
|
|
|
|
|
act='relu')
|
|
|
|
|
act='sigmoid')
|
|
|
|
|
|
|
|
|
|
def forward(self, input):
|
|
|
|
|
y = self._pool(input)
|
|
|
|
@ -316,6 +316,7 @@ class TestImperativeResneXt(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
batch_size = train_parameters["batch_size"]
|
|
|
|
|
batch_num = 2
|
|
|
|
|
epoch_num = 1
|
|
|
|
|
with fluid.dygraph.guard():
|
|
|
|
|
fluid.default_startup_program().random_seed = seed
|
|
|
|
|
fluid.default_main_program().random_seed = seed
|
|
|
|
@ -327,52 +328,54 @@ class TestImperativeResneXt(unittest.TestCase):
|
|
|
|
|
random.seed = seed
|
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
|
paddle.dataset.flowers.train(use_xmap=False),
|
|
|
|
|
batch_size=batch_size)
|
|
|
|
|
batch_size=batch_size,
|
|
|
|
|
drop_last=True)
|
|
|
|
|
|
|
|
|
|
dy_param_init_value = {}
|
|
|
|
|
for param in se_resnext.parameters():
|
|
|
|
|
dy_param_init_value[param.name] = param._numpy()
|
|
|
|
|
|
|
|
|
|
for batch_id, data in enumerate(train_reader()):
|
|
|
|
|
if batch_id >= batch_num:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
dy_x_data = np.array(
|
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
|
batch_size, 1)
|
|
|
|
|
|
|
|
|
|
img = to_variable(dy_x_data)
|
|
|
|
|
label = to_variable(y_data)
|
|
|
|
|
label._stop_gradient = True
|
|
|
|
|
|
|
|
|
|
out = se_resnext(img)
|
|
|
|
|
loss = fluid.layers.cross_entropy(input=out, label=label)
|
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
|
|
|
|
|
|
dy_out = avg_loss._numpy()
|
|
|
|
|
|
|
|
|
|
if batch_id == 0:
|
|
|
|
|
for epoch_id in range(epoch_num):
|
|
|
|
|
for batch_id, data in enumerate(train_reader()):
|
|
|
|
|
|
|
|
|
|
if batch_id >= batch_num and batch_num != -1:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
dy_x_data = np.array(
|
|
|
|
|
[x[0].reshape(3, 224, 224)
|
|
|
|
|
for x in data]).astype('float32')
|
|
|
|
|
y_data = np.array(
|
|
|
|
|
[x[1] for x in data]).astype('int64').reshape(
|
|
|
|
|
batch_size, 1)
|
|
|
|
|
|
|
|
|
|
img = to_variable(dy_x_data)
|
|
|
|
|
label = to_variable(y_data)
|
|
|
|
|
label._stop_gradient = True
|
|
|
|
|
|
|
|
|
|
out = se_resnext(img)
|
|
|
|
|
loss = fluid.layers.cross_entropy(input=out, label=label)
|
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
|
|
|
|
|
|
dy_out = avg_loss._numpy()
|
|
|
|
|
|
|
|
|
|
if batch_id == 0:
|
|
|
|
|
for param in se_resnext.parameters():
|
|
|
|
|
if param.name not in dy_param_init_value:
|
|
|
|
|
dy_param_init_value[param.name] = param._numpy()
|
|
|
|
|
avg_loss._backward()
|
|
|
|
|
|
|
|
|
|
#dy_grad_value = {}
|
|
|
|
|
#for param in se_resnext.parameters():
|
|
|
|
|
# if param.trainable:
|
|
|
|
|
# np_array = np.array(param._ivar._grad_ivar().value()
|
|
|
|
|
# .get_tensor())
|
|
|
|
|
# dy_grad_value[param.name + core.grad_var_suffix()] = np_array
|
|
|
|
|
|
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
|
se_resnext.clear_gradients()
|
|
|
|
|
|
|
|
|
|
dy_param_value = {}
|
|
|
|
|
for param in se_resnext.parameters():
|
|
|
|
|
if param.name not in dy_param_init_value:
|
|
|
|
|
dy_param_init_value[param.name] = param._numpy()
|
|
|
|
|
|
|
|
|
|
avg_loss._backward()
|
|
|
|
|
|
|
|
|
|
dy_grad_value = {}
|
|
|
|
|
for param in se_resnext.parameters():
|
|
|
|
|
if param.trainable:
|
|
|
|
|
np_array = np.array(param._ivar._grad_ivar().value()
|
|
|
|
|
.get_tensor())
|
|
|
|
|
dy_grad_value[param.name + core.grad_var_suffix(
|
|
|
|
|
)] = np_array
|
|
|
|
|
|
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
|
se_resnext.clear_gradients()
|
|
|
|
|
|
|
|
|
|
dy_param_value = {}
|
|
|
|
|
for param in se_resnext.parameters():
|
|
|
|
|
dy_param_value[param.name] = param._numpy()
|
|
|
|
|
dy_param_value[param.name] = param._numpy()
|
|
|
|
|
|
|
|
|
|
with new_program_scope():
|
|
|
|
|
fluid.default_startup_program().random_seed = seed
|
|
|
|
@ -389,7 +392,8 @@ class TestImperativeResneXt(unittest.TestCase):
|
|
|
|
|
random.seed = seed
|
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
|
paddle.dataset.flowers.train(use_xmap=False),
|
|
|
|
|
batch_size=batch_size)
|
|
|
|
|
batch_size=batch_size,
|
|
|
|
|
drop_last=True)
|
|
|
|
|
|
|
|
|
|
img = fluid.layers.data(
|
|
|
|
|
name='pixel', shape=[3, 224, 224], dtype='float32')
|
|
|
|
@ -415,37 +419,42 @@ class TestImperativeResneXt(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
for i in range(len(static_param_name_list)):
|
|
|
|
|
static_param_init_value[static_param_name_list[i]] = out[i]
|
|
|
|
|
|
|
|
|
|
for batch_id, data in enumerate(train_reader()):
|
|
|
|
|
if batch_id >= batch_num:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
static_x_data = np.array(
|
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
|
[batch_size, 1])
|
|
|
|
|
|
|
|
|
|
fetch_list = [avg_loss.name]
|
|
|
|
|
fetch_list.extend(static_param_name_list)
|
|
|
|
|
fetch_list.extend(static_grad_name_list)
|
|
|
|
|
out = exe.run(fluid.default_main_program(),
|
|
|
|
|
feed={"pixel": static_x_data,
|
|
|
|
|
"label": y_data},
|
|
|
|
|
fetch_list=fetch_list)
|
|
|
|
|
|
|
|
|
|
static_param_value = {}
|
|
|
|
|
static_grad_value = {}
|
|
|
|
|
static_out = out[0]
|
|
|
|
|
param_start_pos = 1
|
|
|
|
|
grad_start_pos = len(static_param_name_list) + param_start_pos
|
|
|
|
|
for i in range(param_start_pos,
|
|
|
|
|
len(static_param_name_list) + param_start_pos):
|
|
|
|
|
static_param_value[static_param_name_list[
|
|
|
|
|
i - param_start_pos]] = out[i]
|
|
|
|
|
for i in range(grad_start_pos,
|
|
|
|
|
len(static_grad_name_list) + grad_start_pos):
|
|
|
|
|
static_grad_value[static_grad_name_list[
|
|
|
|
|
i - grad_start_pos]] = out[i]
|
|
|
|
|
for epoch_id in range(epoch_num):
|
|
|
|
|
for batch_id, data in enumerate(train_reader()):
|
|
|
|
|
if batch_id >= batch_num and batch_num != -1:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
static_x_data = np.array(
|
|
|
|
|
[x[0].reshape(3, 224, 224)
|
|
|
|
|
for x in data]).astype('float32')
|
|
|
|
|
y_data = np.array(
|
|
|
|
|
[x[1] for x in data]).astype('int64').reshape(
|
|
|
|
|
[batch_size, 1])
|
|
|
|
|
|
|
|
|
|
fetch_list = [avg_loss.name]
|
|
|
|
|
fetch_list.extend(static_param_name_list)
|
|
|
|
|
fetch_list.extend(static_grad_name_list)
|
|
|
|
|
out = exe.run(
|
|
|
|
|
fluid.default_main_program(),
|
|
|
|
|
feed={"pixel": static_x_data,
|
|
|
|
|
"label": y_data},
|
|
|
|
|
fetch_list=fetch_list)
|
|
|
|
|
|
|
|
|
|
static_param_value = {}
|
|
|
|
|
static_grad_value = {}
|
|
|
|
|
static_out = out[0]
|
|
|
|
|
param_start_pos = 1
|
|
|
|
|
grad_start_pos = len(
|
|
|
|
|
static_param_name_list) + param_start_pos
|
|
|
|
|
for i in range(
|
|
|
|
|
param_start_pos,
|
|
|
|
|
len(static_param_name_list) + param_start_pos):
|
|
|
|
|
static_param_value[static_param_name_list[
|
|
|
|
|
i - param_start_pos]] = out[i]
|
|
|
|
|
for i in range(grad_start_pos,
|
|
|
|
|
len(static_grad_name_list) + grad_start_pos):
|
|
|
|
|
static_grad_value[static_grad_name_list[
|
|
|
|
|
i - grad_start_pos]] = out[i]
|
|
|
|
|
self.assertTrue(np.allclose(static_out, dy_out))
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
|
|
|
|
@ -454,12 +463,12 @@ class TestImperativeResneXt(unittest.TestCase):
|
|
|
|
|
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
|
|
|
|
|
self.assertTrue(np.isfinite(value.all()))
|
|
|
|
|
self.assertFalse(np.isnan(value.any()))
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(dy_grad_value), len(static_grad_value))
|
|
|
|
|
for key, value in six.iteritems(static_grad_value):
|
|
|
|
|
self.assertTrue(np.allclose(value, dy_grad_value[key]))
|
|
|
|
|
self.assertTrue(np.isfinite(value.all()))
|
|
|
|
|
self.assertFalse(np.isnan(value.any()))
|
|
|
|
|
# FIXME(Yancey1989): np.array(_ivar.value().get_tensor()) leads to memory lake
|
|
|
|
|
#self.assertEqual(len(dy_grad_value), len(static_grad_value))
|
|
|
|
|
#for key, value in six.iteritems(static_grad_value):
|
|
|
|
|
# self.assertTrue(np.allclose(value, dy_grad_value[key]))
|
|
|
|
|
# self.assertTrue(np.isfinite(value.all()))
|
|
|
|
|
# self.assertFalse(np.isnan(value.any()))
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(dy_param_value), len(static_param_value))
|
|
|
|
|
for key, value in six.iteritems(static_param_value):
|
|
|
|
|