|
|
@ -25,17 +25,18 @@ from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
|
|
|
|
from paddle.fluid.imperative.base import to_variable
|
|
|
|
from paddle.fluid.imperative.base import to_variable
|
|
|
|
from test_imperative_base import new_program_scope
|
|
|
|
from test_imperative_base import new_program_scope
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
batch_size = 8
|
|
|
|
train_parameters = {
|
|
|
|
train_parameters = {
|
|
|
|
"input_size": [3, 224, 224],
|
|
|
|
"input_size": [3, 224, 224],
|
|
|
|
"input_mean": [0.485, 0.456, 0.406],
|
|
|
|
"input_mean": [0.485, 0.456, 0.406],
|
|
|
|
"input_std": [0.229, 0.224, 0.225],
|
|
|
|
"input_std": [0.229, 0.224, 0.225],
|
|
|
|
"learning_strategy": {
|
|
|
|
"learning_strategy": {
|
|
|
|
"name": "piecewise_decay",
|
|
|
|
"name": "piecewise_decay",
|
|
|
|
"batch_size": 1,
|
|
|
|
"batch_size": batch_size,
|
|
|
|
"epochs": [30, 60, 90],
|
|
|
|
"epochs": [30, 60, 90],
|
|
|
|
"steps": [0.1, 0.01, 0.001, 0.0001]
|
|
|
|
"steps": [0.1, 0.01, 0.001, 0.0001]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"batch_size": 1,
|
|
|
|
"batch_size": batch_size,
|
|
|
|
"lr": 0.1,
|
|
|
|
"lr": 0.1,
|
|
|
|
"total_images": 1281164,
|
|
|
|
"total_images": 1281164,
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -56,6 +57,7 @@ def optimizer_setting(params):
|
|
|
|
lr = []
|
|
|
|
lr = []
|
|
|
|
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
|
|
|
|
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
|
|
|
|
optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
|
|
|
|
optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
|
|
|
|
|
|
|
|
# TODO(minqiyang): Add learning rate scheduler support to imperative mode
|
|
|
|
# optimizer = fluid.optimizer.Momentum(
|
|
|
|
# optimizer = fluid.optimizer.Momentum(
|
|
|
|
# learning_rate=params["lr"],
|
|
|
|
# learning_rate=params["lr"],
|
|
|
|
# learning_rate=fluid.layers.piecewise_decay(
|
|
|
|
# learning_rate=fluid.layers.piecewise_decay(
|
|
|
@ -208,8 +210,12 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
|
|
resnet = ResNet()
|
|
|
|
resnet = ResNet()
|
|
|
|
optimizer = optimizer_setting(train_parameters)
|
|
|
|
optimizer = optimizer_setting(train_parameters)
|
|
|
|
|
|
|
|
np.random.seed(seed)
|
|
|
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
random.seed = seed
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
paddle.dataset.flowers.train(), batch_size=batch_size)
|
|
|
|
paddle.dataset.flowers.train(use_xmap=False),
|
|
|
|
|
|
|
|
batch_size=batch_size)
|
|
|
|
|
|
|
|
|
|
|
|
dy_param_init_value = {}
|
|
|
|
dy_param_init_value = {}
|
|
|
|
for param in fluid.default_main_program().global_block(
|
|
|
|
for param in fluid.default_main_program().global_block(
|
|
|
@ -220,18 +226,22 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
if batch_id >= 1:
|
|
|
|
if batch_id >= 1:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
x_data = np.array(
|
|
|
|
dy_x_data = np.array(
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
|
|
|
|
print('dy input shape', dy_x_data.shape)
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
batch_size, 1)
|
|
|
|
batch_size, 1)
|
|
|
|
|
|
|
|
|
|
|
|
img = to_variable(x_data)
|
|
|
|
img = to_variable(dy_x_data)
|
|
|
|
label = to_variable(y_data)
|
|
|
|
label = to_variable(y_data)
|
|
|
|
label._stop_gradient = True
|
|
|
|
label._stop_gradient = True
|
|
|
|
|
|
|
|
|
|
|
|
out = resnet(img)
|
|
|
|
out = resnet(img)
|
|
|
|
loss = fluid.layers.cross_entropy(input=out, label=label)
|
|
|
|
loss = fluid.layers.cross_entropy(input=out, label=label)
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print('shapex ', avg_loss.shape)
|
|
|
|
|
|
|
|
|
|
|
|
dy_out = avg_loss._numpy()
|
|
|
|
dy_out = avg_loss._numpy()
|
|
|
|
|
|
|
|
|
|
|
|
if batch_id == 0:
|
|
|
|
if batch_id == 0:
|
|
|
@ -241,6 +251,15 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
dy_param_init_value[param.name] = param._numpy()
|
|
|
|
dy_param_init_value[param.name] = param._numpy()
|
|
|
|
|
|
|
|
|
|
|
|
avg_loss._backward()
|
|
|
|
avg_loss._backward()
|
|
|
|
|
|
|
|
dy_grad_value = {}
|
|
|
|
|
|
|
|
for param in fluid.default_main_program().global_block(
|
|
|
|
|
|
|
|
).all_parameters():
|
|
|
|
|
|
|
|
if not param.stop_gradient:
|
|
|
|
|
|
|
|
np_array = np.array(param._ivar._grad_ivar().value()
|
|
|
|
|
|
|
|
.get_tensor())
|
|
|
|
|
|
|
|
dy_grad_value[param.name + core.grad_var_suffix(
|
|
|
|
|
|
|
|
)] = np_array
|
|
|
|
|
|
|
|
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
|
|
|
|
|
|
|
|
dy_param_value = {}
|
|
|
|
dy_param_value = {}
|
|
|
@ -256,8 +275,13 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
|
|
resnet = ResNet()
|
|
|
|
resnet = ResNet()
|
|
|
|
optimizer = optimizer_setting(train_parameters)
|
|
|
|
optimizer = optimizer_setting(train_parameters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
np.random.seed(seed)
|
|
|
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
random.seed = seed
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
train_reader = paddle.batch(
|
|
|
|
paddle.dataset.flowers.train(), batch_size=batch_size)
|
|
|
|
paddle.dataset.flowers.train(use_xmap=False),
|
|
|
|
|
|
|
|
batch_size=batch_size)
|
|
|
|
|
|
|
|
|
|
|
|
img = fluid.layers.data(
|
|
|
|
img = fluid.layers.data(
|
|
|
|
name='pixel', shape=[3, 224, 224], dtype='float32')
|
|
|
|
name='pixel', shape=[3, 224, 224], dtype='float32')
|
|
|
@ -267,12 +291,21 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
avg_loss = fluid.layers.mean(x=loss)
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
optimizer.minimize(avg_loss)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print('avg_loss shape', avg_loss.shape)
|
|
|
|
|
|
|
|
print(fluid.default_main_program())
|
|
|
|
|
|
|
|
|
|
|
|
# initialize params and fetch them
|
|
|
|
# initialize params and fetch them
|
|
|
|
static_param_init_value = {}
|
|
|
|
static_param_init_value = {}
|
|
|
|
static_param_name_list = []
|
|
|
|
static_param_name_list = []
|
|
|
|
|
|
|
|
static_grad_name_list = []
|
|
|
|
for param in fluid.default_startup_program().global_block(
|
|
|
|
for param in fluid.default_startup_program().global_block(
|
|
|
|
).all_parameters():
|
|
|
|
).all_parameters():
|
|
|
|
static_param_name_list.append(param.name)
|
|
|
|
static_param_name_list.append(param.name)
|
|
|
|
|
|
|
|
for param in fluid.default_main_program().global_block(
|
|
|
|
|
|
|
|
).all_parameters():
|
|
|
|
|
|
|
|
if not param.stop_gradient:
|
|
|
|
|
|
|
|
static_grad_name_list.append(param.name +
|
|
|
|
|
|
|
|
core.grad_var_suffix())
|
|
|
|
|
|
|
|
|
|
|
|
out = exe.run(fluid.default_startup_program(),
|
|
|
|
out = exe.run(fluid.default_startup_program(),
|
|
|
|
fetch_list=static_param_name_list)
|
|
|
|
fetch_list=static_param_name_list)
|
|
|
@ -284,34 +317,49 @@ class TestImperativeResnet(unittest.TestCase):
|
|
|
|
if batch_id >= 1:
|
|
|
|
if batch_id >= 1:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
x_data = np.array(
|
|
|
|
static_x_data = np.array(
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
|
|
|
[batch_size, 1])
|
|
|
|
[batch_size, 1])
|
|
|
|
|
|
|
|
|
|
|
|
fetch_list = [loss.name]
|
|
|
|
fetch_list = [avg_loss.name]
|
|
|
|
fetch_list.extend(static_param_name_list)
|
|
|
|
fetch_list.extend(static_param_name_list)
|
|
|
|
|
|
|
|
fetch_list.extend(static_grad_name_list)
|
|
|
|
out = exe.run(fluid.default_main_program(),
|
|
|
|
out = exe.run(fluid.default_main_program(),
|
|
|
|
feed={"pixel": x_data,
|
|
|
|
feed={"pixel": static_x_data,
|
|
|
|
"label": y_data},
|
|
|
|
"label": y_data},
|
|
|
|
fetch_list=fetch_list)
|
|
|
|
fetch_list=fetch_list)
|
|
|
|
|
|
|
|
|
|
|
|
static_param_value = {}
|
|
|
|
static_param_value = {}
|
|
|
|
|
|
|
|
static_grad_value = {}
|
|
|
|
static_out = out[0]
|
|
|
|
static_out = out[0]
|
|
|
|
for i in range(1, len(out)):
|
|
|
|
param_start_pos = 1
|
|
|
|
static_param_value[static_param_name_list[i - 1]] = out[i]
|
|
|
|
grad_start_pos = len(static_param_name_list) + param_start_pos
|
|
|
|
|
|
|
|
for i in range(param_start_pos,
|
|
|
|
|
|
|
|
len(static_param_name_list) + param_start_pos):
|
|
|
|
|
|
|
|
static_param_value[static_param_name_list[
|
|
|
|
|
|
|
|
i - param_start_pos]] = out[i]
|
|
|
|
|
|
|
|
for i in range(grad_start_pos,
|
|
|
|
|
|
|
|
len(static_grad_name_list) + grad_start_pos):
|
|
|
|
|
|
|
|
static_grad_value[static_grad_name_list[
|
|
|
|
|
|
|
|
i - grad_start_pos]] = out[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(static_out, dy_out))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
|
|
|
|
|
|
|
|
for key, value in six.iteritems(static_param_init_value):
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
|
|
|
|
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(static_out.all(), dy_out.all()))
|
|
|
|
self.assertEqual(len(dy_grad_value), len(static_grad_value))
|
|
|
|
|
|
|
|
# TODO(minqiyang): find a way to align the gradient
|
|
|
|
|
|
|
|
# for key, value in six.iteritems(static_grad_value):
|
|
|
|
|
|
|
|
# self.assertTrue(
|
|
|
|
|
|
|
|
# np.allclose(value, dy_grad_value[key]))
|
|
|
|
|
|
|
|
|
|
|
|
for key, value in six.iteritems(static_param_init_value):
|
|
|
|
self.assertEqual(len(dy_param_value), len(static_param_value))
|
|
|
|
self.assertTrue(
|
|
|
|
# for key, value in six.iteritems(static_param_value):
|
|
|
|
np.allclose(value.all(), dy_param_init_value[key].all()))
|
|
|
|
|
|
|
|
|
|
|
|
# self.assertTrue(np.allclose(value, dy_param_value[key]))
|
|
|
|
for key, value in six.iteritems(static_param_value):
|
|
|
|
|
|
|
|
if not np.allclose(value.all(), dy_param_value[key].all()):
|
|
|
|
|
|
|
|
print(key)
|
|
|
|
|
|
|
|
print(value, dy_param_value[key])
|
|
|
|
|
|
|
|
self.assertTrue(np.allclose(value.all(), dy_param_value[key].all()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if __name__ == '__main__':
|
|
|
|