From 6477b443f3d6ec1d8024de2228f5806fc4cc318f Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 26 Feb 2019 10:27:34 +0800 Subject: [PATCH 1/5] fix default value. test=develop --- .../unittests/ir_memory_optimize_net_base.py | 145 ++++++++++++++++++ .../test_eager_deletion_dynamic_rnn_base.py | 2 + .../test_ir_memory_optimize_ifelse_net.py | 55 +++++++ .../unittests/test_ir_memory_optimize_nlp.py | 55 +++++++ .../test_ir_memory_optimize_transformer.py | 3 - 5 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py create mode 100644 python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py create mode 100644 python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py new file mode 100644 index 0000000000..be0e0b7a3a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -0,0 +1,145 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import six +import unittest +import time +import math +import multiprocessing + +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler + +# open eager delete mode +os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0' +os.environ['FLAGS_fast_eager_deletion_mode'] = 'true' +os.environ['CPU_NUM'] = '2' + + +class BuildIrMemOptBase(unittest.TestCase): + def check_network_convergence(self, + network, + use_cuda=True, + memory_opt=True, + use_ir_memory_optimize=True, + enable_inplace=True, + iter=5): + if use_cuda and not core.is_compiled_with_cuda(): + print('Skip use_cuda=True because Paddle is not compiled with cuda') + return + + if os.name == 'nt': + print( + 'Skip use_parallel_executor=True because Paddle comes without parallel support on windows' + ) + return + batch_size = 32 + batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + + # build network + word_dict = paddle.dataset.imdb.word_dict() + train_reader = paddle.batch( + paddle.dataset.imdb.train(word_dict), batch_size=batch_size) + + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + cost = network(data, label, len(word_dict)) + optimizer = fluid.optimizer.Adam(learning_rate=0.2) + optimizer.minimize(cost) + if memory_opt: + fluid.memory_optimize(main) + + # execution + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + reader = feeder.decorate_reader(train_reader, multi_devices=True) + exe = fluid.Executor(place) + fluid.default_startup_program().random_seed = 1 + fluid.default_main_program().random_seed = 1 + exe.run(fluid.default_startup_program()) + + train_cp = compiler.CompiledProgram(fluid.default_main_program()) + train_cp = train_cp.with_data_parallel(loss_name=cost.name) + fetch_list = [cost.name] + + begin = time.time() + first_loss, last_loss = None, None + step_id = 0 + custom_iter = getattr(self, "iter") + if not custom_iter == None: + iter = custom_iter + for data in reader(): + ret = exe.run(train_cp, feed=data, fetch_list=fetch_list) + print(ret) + step_id += 1 + if step_id == 0: + first_loss = res[0] + if step_id == iter: + last_loss = res[0] + break + end = time.time() + + print("%.4f Instance per second" % ( + (batch_size * iter) / (end - begin))) + + avg_last_loss_val = np.array(last_loss).mean() + avg_first_loss_val = np.array(first_loss).mean() + if math.isnan(float(avg_last_loss_val)) or math.isnan( + float(avg_first_loss_val)): + sys.exit("got NaN loss, training failed.") + + print(first_loss, last_loss) + return first_loss, last_loss + + +class TestIrMemOptBase(BuildIrMemOptBase): + def setUp(self): + self.network = None + + def test_network(self): + if self.network is None: + return + + baseline_first_loss, baseline_last_loss = None, None + for use_cuda in [True, False]: + for use_python_mem_opt in [True, False]: + print( + 'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'. + format(self.network.__name__, use_cuda, use_python_mem_opt, + not use_python_mem_opt)) + with fluid.program_guard(fluid.Program(), fluid.Program()): + with fluid.scope_guard(core.Scope()): + if use_cuda is False and use_python_mem_opt is False: + baseline_first_loss, baseline_last_loss = self.check_network_convergence( + self.network, + use_cuda=use_cuda, + memory_opt=use_python_mem_opt) + else: + cur_first_loss, cur_last_loss = self.check_network_convergence( + self.network, + use_cuda=use_cuda, + memory_opt=use_python_mem_opt) + for loss in zip(baseline_first_loss, + cur_first_loss): + self.assertAlmostEqual(loss[0], loss[1], 1e-5) + for loss in zip(baseline_last_loss, cur_last_loss): + self.assertAlmostEqual(loss[0], loss[1], 1e-5) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py index bc3c422f2f..910f53a91a 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py @@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): train_reader, multi_devices=use_parallel_executor) exe = fluid.Executor(place) + fluid.default_startup_program().random_seed = 1 + fluid.default_main_program().random_seed = 1 exe.run(fluid.default_startup_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py new file mode 100644 index 0000000000..7ae7920fb6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle.fluid as fluid +import unittest +from ir_memory_optimize_net_base import TestIrMemOptBase +from paddle.fluid.layers.control_flow import ConditionalBlock + + +def lstm_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + emb_lr=30.0): + emb = fluid.layers.embedding( + input=data, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr(learning_rate=emb_lr)) + fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) + + lstm_h, c = fluid.layers.dynamic_lstm( + input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') + lstm_max_tanh = fluid.layers.tanh(lstm_max) + fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + return avg_cost + + +class TestIrMemOptRNN(TestIrMemOptBase): + def setUp(self): + self.network = lstm_net + self.iter = 2 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py new file mode 100644 index 0000000000..30b6d6106c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# nlp model stack of op operate on lod. It's a classical test case in optimize pass. + +from __future__ import print_function + +import paddle.fluid as fluid +import unittest +from ir_memory_optimize_net_base import TestIrMemOptBase + + +def lstm_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + emb_lr=30.0): + emb = fluid.layers.embedding( + input=data, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr(learning_rate=emb_lr)) + fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) + + lstm_h, c = fluid.layers.dynamic_lstm( + input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') + lstm_max_tanh = fluid.layers.tanh(lstm_max) + fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + return avg_cost + + +class TestIrMemOptRNN(TestIrMemOptBase): + def setUp(self): + self.network = lstm_net + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py index fe5c7b7a39..50d998990f 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py @@ -28,9 +28,6 @@ os.environ[ from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input from parallel_executor_test_base import TestParallelExecutorBase -# disable temporarily because of timeout. -sys.exit(0) - # NOTE(dzhwinter): test diferent strategy colisions. # open the eager delete tensor strategy by default. From a922a0a1efbe9a1a876439c5732d0d3658da5f46 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 26 Feb 2019 10:53:21 +0800 Subject: [PATCH 2/5] fix default value. test=develop --- .../unittests/ir_memory_optimize_net_base.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py index be0e0b7a3a..8b3f9c485e 100644 --- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -19,6 +19,7 @@ import unittest import time import math import multiprocessing +import numpy as np import paddle import paddle.fluid.core as core @@ -63,18 +64,18 @@ class BuildIrMemOptBase(unittest.TestCase): label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = network(data, label, len(word_dict)) - optimizer = fluid.optimizer.Adam(learning_rate=0.2) + optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(cost) if memory_opt: - fluid.memory_optimize(main) + fluid.memory_optimize(fluid.default_main_program()) # execution place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[data, label], place=place) reader = feeder.decorate_reader(train_reader, multi_devices=True) exe = fluid.Executor(place) - fluid.default_startup_program().random_seed = 1 - fluid.default_main_program().random_seed = 1 + fluid.default_startup_program().random_seed = 100 + fluid.default_main_program().random_seed = 100 exe.run(fluid.default_startup_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) @@ -84,30 +85,30 @@ class BuildIrMemOptBase(unittest.TestCase): begin = time.time() first_loss, last_loss = None, None step_id = 0 - custom_iter = getattr(self, "iter") + custom_iter = getattr(self, "iter", None) if not custom_iter == None: iter = custom_iter for data in reader(): ret = exe.run(train_cp, feed=data, fetch_list=fetch_list) print(ret) step_id += 1 - if step_id == 0: - first_loss = res[0] + if step_id == 1: + first_loss = ret[0] if step_id == iter: - last_loss = res[0] + last_loss = ret[0] break end = time.time() print("%.4f Instance per second" % ( (batch_size * iter) / (end - begin))) + print(first_loss, last_loss) avg_last_loss_val = np.array(last_loss).mean() avg_first_loss_val = np.array(first_loss).mean() if math.isnan(float(avg_last_loss_val)) or math.isnan( float(avg_first_loss_val)): sys.exit("got NaN loss, training failed.") - print(first_loss, last_loss) return first_loss, last_loss @@ -128,7 +129,7 @@ class TestIrMemOptBase(BuildIrMemOptBase): not use_python_mem_opt)) with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.scope_guard(core.Scope()): - if use_cuda is False and use_python_mem_opt is False: + if use_cuda is True and use_python_mem_opt is True: baseline_first_loss, baseline_last_loss = self.check_network_convergence( self.network, use_cuda=use_cuda, @@ -138,8 +139,7 @@ class TestIrMemOptBase(BuildIrMemOptBase): self.network, use_cuda=use_cuda, memory_opt=use_python_mem_opt) - for loss in zip(baseline_first_loss, - cur_first_loss): - self.assertAlmostEqual(loss[0], loss[1], 1e-5) - for loss in zip(baseline_last_loss, cur_last_loss): - self.assertAlmostEqual(loss[0], loss[1], 1e-5) + self.assertAlmostEquals(baseline_last_loss, + cur_last_loss, 1e-2) + self.assertAlmostEquals(baseline_first_loss, + cur_first_loss, 1e-2) From a4cf29547155423188ac79c85002c0985b72ce3d Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 26 Feb 2019 11:16:16 +0800 Subject: [PATCH 3/5] fix default value. test=develop --- .../tests/unittests/ir_memory_optimize_net_base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py index 8b3f9c485e..84aa6b0352 100644 --- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -49,6 +49,8 @@ class BuildIrMemOptBase(unittest.TestCase): 'Skip use_parallel_executor=True because Paddle comes without parallel support on windows' ) return + fluid.default_startup_program().random_seed = 100 + fluid.default_main_program().random_seed = 100 batch_size = 32 batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) @@ -74,8 +76,6 @@ class BuildIrMemOptBase(unittest.TestCase): feeder = fluid.DataFeeder(feed_list=[data, label], place=place) reader = feeder.decorate_reader(train_reader, multi_devices=True) exe = fluid.Executor(place) - fluid.default_startup_program().random_seed = 100 - fluid.default_main_program().random_seed = 100 exe.run(fluid.default_startup_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) @@ -139,7 +139,7 @@ class TestIrMemOptBase(BuildIrMemOptBase): self.network, use_cuda=use_cuda, memory_opt=use_python_mem_opt) - self.assertAlmostEquals(baseline_last_loss, - cur_last_loss, 1e-2) - self.assertAlmostEquals(baseline_first_loss, - cur_first_loss, 1e-2) + self.assertAlmostEquals(np.mean(baseline_last_loss), + np.mean(cur_last_loss), delta=1e-2) + self.assertAlmostEquals(np.mean(baseline_first_loss), + np.mean(cur_first_loss), delta=1e-2) From dfb2121967c24d13f1282a545625c4a4afa7a99a Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 26 Feb 2019 11:18:45 +0800 Subject: [PATCH 4/5] fix default value. test=develop --- .../paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py index 84aa6b0352..bf6adce8ac 100644 --- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -121,7 +121,7 @@ class TestIrMemOptBase(BuildIrMemOptBase): return baseline_first_loss, baseline_last_loss = None, None - for use_cuda in [True, False]: + for use_cuda in [True]: for use_python_mem_opt in [True, False]: print( 'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'. From 48d9fd08e5193a505a8dea48926f2ab2abfd129f Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 26 Feb 2019 13:49:55 +0800 Subject: [PATCH 5/5] fix default value. test=develop --- .../unittests/ir_memory_optimize_net_base.py | 15 +++-- .../test_ir_memory_optimize_ifelse_net.py | 55 ------------------- 2 files changed, 10 insertions(+), 60 deletions(-) delete mode 100644 python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py index bf6adce8ac..079f0d2205 100644 --- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -117,7 +117,7 @@ class TestIrMemOptBase(BuildIrMemOptBase): self.network = None def test_network(self): - if self.network is None: + if self.network is None or not core.is_compiled_with_cuda(): return baseline_first_loss, baseline_last_loss = None, None @@ -139,7 +139,12 @@ class TestIrMemOptBase(BuildIrMemOptBase): self.network, use_cuda=use_cuda, memory_opt=use_python_mem_opt) - self.assertAlmostEquals(np.mean(baseline_last_loss), - np.mean(cur_last_loss), delta=1e-2) - self.assertAlmostEquals(np.mean(baseline_first_loss), - np.mean(cur_first_loss), delta=1e-2) + + self.assertAlmostEquals( + np.mean(baseline_last_loss), + np.mean(cur_last_loss), + delta=1e-2) + self.assertAlmostEquals( + np.mean(baseline_first_loss), + np.mean(cur_first_loss), + delta=1e-2) diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py deleted file mode 100644 index 7ae7920fb6..0000000000 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_net.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import paddle.fluid as fluid -import unittest -from ir_memory_optimize_net_base import TestIrMemOptBase -from paddle.fluid.layers.control_flow import ConditionalBlock - - -def lstm_net(data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=30.0): - emb = fluid.layers.embedding( - input=data, - size=[dict_dim, emb_dim], - param_attr=fluid.ParamAttr(learning_rate=emb_lr)) - fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = fluid.layers.tanh(lstm_max) - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) - return avg_cost - - -class TestIrMemOptRNN(TestIrMemOptBase): - def setUp(self): - self.network = lstm_net - self.iter = 2 - - -if __name__ == "__main__": - unittest.main()