test=develop, add gradient sort backward strategy (#17125)
* test=develop, add gradient sort backward strategy * test=develop, fix test by add FLAGS_cudnn_deterministic on new testsrevert-17080-prepare_data
parent
1d0ba5e815
commit
4624d7c642
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Created by Jiabin on 2019-04-25.
|
||||
//
|
||||
#pragma once
|
||||
#ifndef PADDLE_BACKWARDSTRATEGY_H
|
||||
#define PADDLE_BACKWARDSTRATEGY_H
|
||||
|
||||
#endif // PADDLE_BACKWARDSTRATEGY_H
|
||||
|
||||
namespace paddle {
|
||||
namespace imperative {
|
||||
namespace detail {
|
||||
|
||||
class BackwardStrategy {
|
||||
public:
|
||||
/* DyGraph now support two kinds of backward strategy, one is sorted sum
|
||||
* gradient, another is sum gradient once they are created */
|
||||
// TODO(jiabin): add more Strategy when we support
|
||||
bool sorted_sum_gradient_{false};
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace imperative
|
||||
} // namespace paddle
|
||||
@ -0,0 +1,19 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from paddle.fluid import core
|
||||
|
||||
__all__ = ["BackwardStrategy"]
|
||||
|
||||
BackwardStrategy = core.BackwardStrategy
|
||||
@ -0,0 +1,149 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import contextlib
|
||||
import unittest
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid import core
|
||||
from paddle.fluid.optimizer import SGDOptimizer
|
||||
from paddle.fluid.dygraph.base import to_variable
|
||||
from test_imperative_base import new_program_scope
|
||||
from test_imperative_mnist import MNIST
|
||||
|
||||
|
||||
class TestImperativeMnistSortGradient(unittest.TestCase):
|
||||
def test_mnist_sort_gradient_float32(self):
|
||||
seed = 90
|
||||
epoch_num = 1
|
||||
|
||||
with fluid.dygraph.guard():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
backward_strategy = fluid.dygraph.BackwardStrategy()
|
||||
backward_strategy.sort_sum_gradient = True
|
||||
|
||||
mnist2 = MNIST("mnist")
|
||||
sgd2 = SGDOptimizer(learning_rate=1e-3)
|
||||
train_reader2 = paddle.batch(
|
||||
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
|
||||
|
||||
mnist2.train()
|
||||
dy_param_init_value2 = {}
|
||||
for epoch in range(epoch_num):
|
||||
for batch_id, data in enumerate(train_reader2()):
|
||||
dy_x_data2 = np.array(
|
||||
[x[0].reshape(1, 28, 28)
|
||||
for x in data]).astype('float32')
|
||||
y_data2 = np.array(
|
||||
[x[1] for x in data]).astype('int64').reshape(128, 1)
|
||||
|
||||
img2 = to_variable(dy_x_data2)
|
||||
label2 = to_variable(y_data2)
|
||||
label2.stop_gradient = True
|
||||
|
||||
cost2 = mnist2(img2)
|
||||
loss2 = fluid.layers.cross_entropy(cost2, label2)
|
||||
avg_loss2 = fluid.layers.mean(loss2)
|
||||
|
||||
dy_out2 = avg_loss2.numpy()
|
||||
|
||||
if epoch == 0 and batch_id == 0:
|
||||
for param in mnist2.parameters():
|
||||
dy_param_init_value2[param.name] = param.numpy()
|
||||
|
||||
avg_loss2.backward(backward_strategy)
|
||||
sgd2.minimize(avg_loss2)
|
||||
mnist2.clear_gradients()
|
||||
|
||||
dy_param_value2 = {}
|
||||
for param in mnist2.parameters():
|
||||
dy_param_value2[param.name] = param.numpy()
|
||||
if batch_id == 20:
|
||||
break
|
||||
|
||||
with new_program_scope():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
|
||||
exe = fluid.Executor(fluid.CPUPlace(
|
||||
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
|
||||
|
||||
mnist = MNIST("mnist")
|
||||
sgd = SGDOptimizer(learning_rate=1e-3)
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
|
||||
|
||||
img = fluid.layers.data(
|
||||
name='pixel', shape=[1, 28, 28], dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
cost = mnist(img)
|
||||
loss = fluid.layers.cross_entropy(cost, label)
|
||||
avg_loss = fluid.layers.mean(loss)
|
||||
sgd.minimize(avg_loss)
|
||||
|
||||
# initialize params and fetch them
|
||||
static_param_init_value = {}
|
||||
static_param_name_list = []
|
||||
for param in mnist.parameters():
|
||||
static_param_name_list.append(param.name)
|
||||
|
||||
out = exe.run(fluid.default_startup_program(),
|
||||
fetch_list=static_param_name_list)
|
||||
|
||||
for i in range(len(static_param_name_list)):
|
||||
static_param_init_value[static_param_name_list[i]] = out[i]
|
||||
|
||||
for epoch in range(epoch_num):
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
static_x_data = np.array(
|
||||
[x[0].reshape(1, 28, 28)
|
||||
for x in data]).astype('float32')
|
||||
y_data = np.array(
|
||||
[x[1] for x in data]).astype('int64').reshape([128, 1])
|
||||
|
||||
fetch_list = [avg_loss.name]
|
||||
fetch_list.extend(static_param_name_list)
|
||||
out = exe.run(
|
||||
fluid.default_main_program(),
|
||||
feed={"pixel": static_x_data,
|
||||
"label": y_data},
|
||||
fetch_list=fetch_list)
|
||||
|
||||
static_param_value = {}
|
||||
static_out = out[0]
|
||||
for i in range(1, len(out)):
|
||||
static_param_value[static_param_name_list[i - 1]] = out[
|
||||
i]
|
||||
if batch_id == 20:
|
||||
break
|
||||
|
||||
self.assertTrue(np.allclose(dy_x_data2.all(), static_x_data.all()))
|
||||
|
||||
for key, value in six.iteritems(static_param_init_value):
|
||||
self.assertTrue(np.allclose(value, dy_param_init_value2[key]))
|
||||
|
||||
self.assertTrue(np.allclose(static_out, dy_out2))
|
||||
|
||||
for key, value in six.iteritems(static_param_value):
|
||||
self.assertTrue(np.allclose(value, dy_param_value2[key], atol=1e-5))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@ -0,0 +1,165 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.core as core
|
||||
from paddle.fluid.dygraph.nn import Embedding
|
||||
import paddle.fluid.framework as framework
|
||||
from paddle.fluid.optimizer import SGDOptimizer
|
||||
from paddle.fluid.dygraph.base import to_variable
|
||||
from test_imperative_base import new_program_scope
|
||||
from test_imperative_ptb_rnn import PtbModel
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
|
||||
class TestDygraphPtbRnnSortGradient(unittest.TestCase):
|
||||
def test_ptb_rnn_sort_gradient_cpu_float32(self):
|
||||
seed = 90
|
||||
hidden_size = 10
|
||||
vocab_size = 1000
|
||||
num_layers = 1
|
||||
num_steps = 3
|
||||
init_scale = 0.1
|
||||
batch_size = 4
|
||||
batch_num = 200
|
||||
|
||||
with fluid.dygraph.guard():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
backward_strategy = fluid.dygraph.BackwardStrategy()
|
||||
backward_strategy.sort_sum_gradient = True
|
||||
# TODO: marsyang1993 Change seed to
|
||||
ptb_model = PtbModel(
|
||||
"ptb_model",
|
||||
hidden_size=hidden_size,
|
||||
vocab_size=vocab_size,
|
||||
num_layers=num_layers,
|
||||
num_steps=num_steps,
|
||||
init_scale=init_scale)
|
||||
|
||||
sgd = SGDOptimizer(learning_rate=1e-3)
|
||||
dy_param_updated = dict()
|
||||
dy_param_init = dict()
|
||||
dy_loss = None
|
||||
last_hidden = None
|
||||
last_cell = None
|
||||
|
||||
for i in range(batch_num):
|
||||
x_data = np.arange(12).reshape(4, 3).astype('int64')
|
||||
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
|
||||
x_data = x_data.reshape((-1, num_steps, 1))
|
||||
y_data = y_data.reshape((-1, 1))
|
||||
init_hidden_data = np.zeros(
|
||||
(num_layers, batch_size, hidden_size), dtype='float32')
|
||||
init_cell_data = np.zeros(
|
||||
(num_layers, batch_size, hidden_size), dtype='float32')
|
||||
x = to_variable(x_data)
|
||||
y = to_variable(y_data)
|
||||
init_hidden = to_variable(init_hidden_data)
|
||||
init_cell = to_variable(init_cell_data)
|
||||
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
|
||||
init_cell)
|
||||
if i == 0:
|
||||
for param in ptb_model.parameters():
|
||||
dy_param_init[param.name] = param.numpy()
|
||||
dy_loss.backward(backward_strategy)
|
||||
sgd.minimize(dy_loss)
|
||||
ptb_model.clear_gradients()
|
||||
if i == batch_num - 1:
|
||||
for param in ptb_model.parameters():
|
||||
dy_param_updated[param.name] = param.numpy()
|
||||
|
||||
with new_program_scope():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
ptb_model = PtbModel(
|
||||
"ptb_model",
|
||||
hidden_size=hidden_size,
|
||||
vocab_size=vocab_size,
|
||||
num_layers=num_layers,
|
||||
num_steps=num_steps,
|
||||
init_scale=init_scale)
|
||||
|
||||
exe = fluid.Executor(fluid.CPUPlace(
|
||||
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
|
||||
sgd = SGDOptimizer(learning_rate=1e-3)
|
||||
x = fluid.layers.data(
|
||||
name="x", shape=[-1, num_steps, 1], dtype='int64')
|
||||
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
|
||||
init_hidden = fluid.layers.data(
|
||||
name="init_hidden", shape=[1], dtype='float32')
|
||||
init_cell = fluid.layers.data(
|
||||
name="init_cell", shape=[1], dtype='float32')
|
||||
|
||||
static_loss, static_last_hidden, static_last_cell = ptb_model(
|
||||
x, y, init_hidden, init_cell)
|
||||
sgd.minimize(static_loss)
|
||||
static_param_updated = dict()
|
||||
static_param_init = dict()
|
||||
static_param_name_list = list()
|
||||
for param in ptb_model.parameters():
|
||||
static_param_name_list.append(param.name)
|
||||
|
||||
out = exe.run(framework.default_startup_program(),
|
||||
fetch_list=static_param_name_list)
|
||||
for i in range(len(static_param_name_list)):
|
||||
static_param_init[static_param_name_list[i]] = out[i]
|
||||
static_loss_value = None
|
||||
static_last_cell_value = None
|
||||
static_last_hidden_value = None
|
||||
for i in range(batch_num):
|
||||
x_data = np.arange(12).reshape(4, 3).astype('int64')
|
||||
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
|
||||
x_data = x_data.reshape((-1, num_steps, 1))
|
||||
y_data = y_data.reshape((-1, 1))
|
||||
init_hidden_data = np.zeros(
|
||||
(num_layers, batch_size, hidden_size), dtype='float32')
|
||||
init_cell_data = np.zeros(
|
||||
(num_layers, batch_size, hidden_size), dtype='float32')
|
||||
fetch_list = [static_loss, static_last_hidden, static_last_cell]
|
||||
fetch_list.extend(static_param_name_list)
|
||||
out = exe.run(fluid.default_main_program(),
|
||||
feed={
|
||||
"x": x_data,
|
||||
"y": y_data,
|
||||
"init_hidden": init_hidden_data,
|
||||
"init_cell": init_cell_data
|
||||
},
|
||||
fetch_list=fetch_list)
|
||||
static_loss_value = out[0]
|
||||
static_last_hidden_value = out[1]
|
||||
static_last_cell_value = out[2]
|
||||
|
||||
if i == batch_num - 1:
|
||||
for k in range(3, len(out)):
|
||||
static_param_updated[static_param_name_list[k -
|
||||
3]] = out[k]
|
||||
|
||||
self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
|
||||
self.assertTrue(
|
||||
np.array_equal(static_last_cell_value, last_cell.numpy()))
|
||||
self.assertTrue(
|
||||
np.array_equal(static_last_hidden_value, last_hidden.numpy()))
|
||||
for key, value in six.iteritems(static_param_init):
|
||||
self.assertTrue(np.array_equal(value, dy_param_init[key]))
|
||||
for key, value in six.iteritems(static_param_updated):
|
||||
self.assertTrue(np.array_equal(value, dy_param_updated[key]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@ -0,0 +1,230 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid import core
|
||||
from paddle.fluid.dygraph.base import to_variable
|
||||
from test_imperative_base import new_program_scope
|
||||
from test_imperative_resnet import ResNet
|
||||
|
||||
batch_size = 8
|
||||
train_parameters = {
|
||||
"input_size": [3, 224, 224],
|
||||
"input_mean": [0.485, 0.456, 0.406],
|
||||
"input_std": [0.229, 0.224, 0.225],
|
||||
"learning_strategy": {
|
||||
"name": "piecewise_decay",
|
||||
"batch_size": batch_size,
|
||||
"epochs": [30, 60, 90],
|
||||
"steps": [0.1, 0.01, 0.001, 0.0001]
|
||||
},
|
||||
"batch_size": batch_size,
|
||||
"lr": 0.1,
|
||||
"total_images": 1281164,
|
||||
}
|
||||
|
||||
|
||||
def optimizer_setting(params):
|
||||
ls = params["learning_strategy"]
|
||||
if ls["name"] == "piecewise_decay":
|
||||
if "total_images" not in params:
|
||||
total_images = 1281167
|
||||
else:
|
||||
total_images = params["total_images"]
|
||||
batch_size = ls["batch_size"]
|
||||
step = int(total_images / batch_size + 1)
|
||||
|
||||
bd = [step * e for e in ls["epochs"]]
|
||||
base_lr = params["lr"]
|
||||
lr = []
|
||||
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
|
||||
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
|
||||
# TODO(minqiyang): Add learning rate scheduler support to dygraph mode
|
||||
# optimizer = fluid.optimizer.Momentum(
|
||||
# learning_rate=params["lr"],
|
||||
# learning_rate=fluid.layers.piecewise_decay(
|
||||
# boundaries=bd, values=lr),
|
||||
# momentum=0.9,
|
||||
# regularization=fluid.regularizer.L2Decay(1e-4))
|
||||
|
||||
return optimizer
|
||||
|
||||
|
||||
class TestDygraphResnetSortGradient(unittest.TestCase):
|
||||
def test_resnet_sort_gradient_float32(self):
|
||||
seed = 90
|
||||
|
||||
batch_size = train_parameters["batch_size"]
|
||||
batch_num = 20
|
||||
with fluid.dygraph.guard():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
backward_strategy = fluid.dygraph.BackwardStrategy()
|
||||
backward_strategy.sort_sum_gradient = True
|
||||
resnet = ResNet("resnet")
|
||||
optimizer = optimizer_setting(train_parameters)
|
||||
np.random.seed(seed)
|
||||
import random
|
||||
random.seed = seed
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.flowers.train(use_xmap=False),
|
||||
batch_size=batch_size)
|
||||
|
||||
dy_param_init_value = {}
|
||||
for param in resnet.parameters():
|
||||
dy_param_init_value[param.name] = param.numpy()
|
||||
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
if batch_id >= batch_num:
|
||||
break
|
||||
|
||||
dy_x_data = np.array(
|
||||
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
||||
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
||||
batch_size, 1)
|
||||
|
||||
img = to_variable(dy_x_data)
|
||||
label = to_variable(y_data)
|
||||
label.stop_gradient = True
|
||||
|
||||
out = resnet(img)
|
||||
loss = fluid.layers.cross_entropy(input=out, label=label)
|
||||
avg_loss = fluid.layers.mean(x=loss)
|
||||
|
||||
dy_out = avg_loss.numpy()
|
||||
|
||||
if batch_id == 0:
|
||||
for param in resnet.parameters():
|
||||
if param.name not in dy_param_init_value:
|
||||
dy_param_init_value[param.name] = param.numpy()
|
||||
|
||||
avg_loss.backward(backward_strategy)
|
||||
|
||||
dy_grad_value = {}
|
||||
for param in resnet.parameters():
|
||||
if param.trainable:
|
||||
np_array = np.array(param._ivar._grad_ivar().value()
|
||||
.get_tensor())
|
||||
dy_grad_value[param.name + core.grad_var_suffix(
|
||||
)] = np_array
|
||||
|
||||
optimizer.minimize(avg_loss)
|
||||
resnet.clear_gradients()
|
||||
|
||||
dy_param_value = {}
|
||||
for param in resnet.parameters():
|
||||
dy_param_value[param.name] = param.numpy()
|
||||
|
||||
with new_program_scope():
|
||||
fluid.default_startup_program().random_seed = seed
|
||||
fluid.default_main_program().random_seed = seed
|
||||
|
||||
exe = fluid.Executor(fluid.CPUPlace(
|
||||
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
|
||||
|
||||
resnet = ResNet("resnet")
|
||||
optimizer = optimizer_setting(train_parameters)
|
||||
|
||||
np.random.seed(seed)
|
||||
import random
|
||||
random.seed = seed
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.flowers.train(use_xmap=False),
|
||||
batch_size=batch_size)
|
||||
|
||||
img = fluid.layers.data(
|
||||
name='pixel', shape=[3, 224, 224], dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
out = resnet(img)
|
||||
loss = fluid.layers.cross_entropy(input=out, label=label)
|
||||
avg_loss = fluid.layers.mean(x=loss)
|
||||
optimizer.minimize(avg_loss)
|
||||
|
||||
# initialize params and fetch them
|
||||
static_param_init_value = {}
|
||||
static_param_name_list = []
|
||||
static_grad_name_list = []
|
||||
for param in resnet.parameters():
|
||||
static_param_name_list.append(param.name)
|
||||
for param in resnet.parameters():
|
||||
if param.trainable:
|
||||
static_grad_name_list.append(param.name +
|
||||
core.grad_var_suffix())
|
||||
|
||||
out = exe.run(fluid.default_startup_program(),
|
||||
fetch_list=static_param_name_list)
|
||||
|
||||
for i in range(len(static_param_name_list)):
|
||||
static_param_init_value[static_param_name_list[i]] = out[i]
|
||||
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
if batch_id >= batch_num:
|
||||
break
|
||||
|
||||
static_x_data = np.array(
|
||||
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
|
||||
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
|
||||
[batch_size, 1])
|
||||
|
||||
fetch_list = [avg_loss.name]
|
||||
fetch_list.extend(static_param_name_list)
|
||||
fetch_list.extend(static_grad_name_list)
|
||||
out = exe.run(fluid.default_main_program(),
|
||||
feed={"pixel": static_x_data,
|
||||
"label": y_data},
|
||||
fetch_list=fetch_list)
|
||||
|
||||
static_param_value = {}
|
||||
static_grad_value = {}
|
||||
static_out = out[0]
|
||||
param_start_pos = 1
|
||||
grad_start_pos = len(static_param_name_list) + param_start_pos
|
||||
for i in range(param_start_pos,
|
||||
len(static_param_name_list) + param_start_pos):
|
||||
static_param_value[static_param_name_list[
|
||||
i - param_start_pos]] = out[i]
|
||||
for i in range(grad_start_pos,
|
||||
len(static_grad_name_list) + grad_start_pos):
|
||||
static_grad_value[static_grad_name_list[
|
||||
i - grad_start_pos]] = out[i]
|
||||
|
||||
self.assertTrue(np.allclose(static_out, dy_out))
|
||||
|
||||
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
|
||||
|
||||
for key, value in six.iteritems(static_param_init_value):
|
||||
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
|
||||
self.assertTrue(np.isfinite(value.all()))
|
||||
self.assertFalse(np.isnan(value.any()))
|
||||
|
||||
self.assertEqual(len(dy_grad_value), len(static_grad_value))
|
||||
for key, value in six.iteritems(static_grad_value):
|
||||
self.assertTrue(np.allclose(value, dy_grad_value[key]))
|
||||
self.assertTrue(np.isfinite(value.all()))
|
||||
self.assertFalse(np.isnan(value.any()))
|
||||
|
||||
self.assertEqual(len(dy_param_value), len(static_param_value))
|
||||
for key, value in six.iteritems(static_param_value):
|
||||
self.assertTrue(np.allclose(value, dy_param_value[key]))
|
||||
self.assertTrue(np.isfinite(value.all()))
|
||||
self.assertFalse(np.isnan(value.any()))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue