From 047fa2f9aa45d7a92b84e1827f373d8b57a789e5 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 14:50:36 +0800 Subject: [PATCH 1/7] Add unit-test for sequence_pooling functor --- paddle/fluid/operators/math/CMakeLists.txt | 1 + .../operators/math/sequence_pooling_test.cc | 105 ++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 paddle/fluid/operators/math/sequence_pooling_test.cc diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index b0276f4080..9088519723 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -70,6 +70,7 @@ cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selec cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) +cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling) if(WITH_GPU) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc new file mode 100644 index 0000000000..ea92b7548b --- /dev/null +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -0,0 +1,105 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/math/sequence_pooling.h" +#include +#include + +template +void TestSequencePoolingSum(const paddle::framework::LoD& lod) { + paddle::framework::LoDTensor cpu_out_grad; + paddle::framework::LoDTensor out_grad; + paddle::framework::LoDTensor in_grad; + const size_t second_dim = 128u; + + // construct out_grad's tensor in cpu + const size_t out_first_dim = lod.size() - 1; + auto out_dims = paddle::framework::make_ddim( + {static_cast(out_first_dim), static_cast(second_dim)}); + + cpu_out_grad.mutable_data(out_dims, paddle::platform::CPUPlace()); + for (int64_t i = 0; i < out_grad.numel(); ++i) { + cpu_out_grad.data()[i] = static_cast(i); + } + + // copy to dst out_grad + auto* place = new Place(); + DeviceContext* context = new DeviceContext(*place); + if (paddle::platform::is_cpu_place(*place)) { + out_grad = cpu_out_grad; + } else { + TensorCopySync(cpu_out_grad, *place, &out_grad); + } + + // construct in_grad + in_grad.set_lod(lod); + auto in_dims = paddle::framework::make_ddim( + {static_cast(lod[0].back()), static_cast(second_dim)}); + in_grad.mutable_data(in_dims, context.GetPlace()); + + // check tensor contruction result + PADDLE_ENFORCE_EQ(in_grad.dims().size(), out_grad.dims().size()); + for (int64_t i = 1; i < out_grad.dims().size(); ++i) { + PADDLE_ENFORCE_EQ(in_grad.dims()[i], out_grad.dims()[i]); + } + + // call functor + paddle::operators::math::SequencePoolGradFunctor()( + *context, "SUM", out_grad, &in_grad) + + EXPECT_EQ(in_grad.numel(), lod[0].back() * second_dim); + EXPECT_EQ(in_grad.lod(), lod); + for (int64_t i = 0; i < in_grad.lod().size() - 1; ++i) { + int64_t begin = in_grad.lod()[i]; + int64_t end = in_grad.lod()[i + 1]; + Tensor tmp = in_grad.Slice(begin, end); + for (int64_t j = 0; j != tmp.numel(); j) { + for (int64_t m = 0; m != second_dim; ++m) { + EXPECT_EQ(tmp.data()[m + j * second_dim], + out_grad.data()[m + i * second_dim]); + } + } + } + + delete place; + delete context; +} + +TEST(SequencePoolingGrad, CPU_SUM) { + paddle::framework::LoD lod1; + auto dim1 = std::vector{0, 10}; + lod1.push_back(dim1); + TestSequencePoolingSum(dim, lod1, "SUM", + 16); + + paddle::framework::LoD lod2; + lod2.push_back(std::vector{0, 2, 7, 10}); + TestSequencePoolingSum(lod2, "SUM", 128); +} + +#ifdef PADDLE_WITH_CUDA +TEST(SequencePoolingGrad, CUDA_SUM) { + paddle::framework::LoD lod1; + lod1.push_back(std::vector{0, 10}); + TestSequencePoolingSum(lod1, "SUM", 16); + + paddle::framework::LoD lod2; + lod2.push_back(std::vector{0, 2, 7, 10}); + TestSequencePoolingSum(lod2, "SUM", 128); +} +#endif From bd5a82e1939213af561375a64dc3babff7512f1c Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 15:10:18 +0800 Subject: [PATCH 2/7] Polish unit test code --- .../operators/math/sequence_pooling_test.cc | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index ea92b7548b..c994d470d4 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -46,7 +46,7 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { in_grad.set_lod(lod); auto in_dims = paddle::framework::make_ddim( {static_cast(lod[0].back()), static_cast(second_dim)}); - in_grad.mutable_data(in_dims, context.GetPlace()); + in_grad.mutable_data(in_dims, context->GetPlace()); // check tensor contruction result PADDLE_ENFORCE_EQ(in_grad.dims().size(), out_grad.dims().size()); @@ -56,15 +56,15 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { // call functor paddle::operators::math::SequencePoolGradFunctor()( - *context, "SUM", out_grad, &in_grad) + *context, "SUM", out_grad, &in_grad); - EXPECT_EQ(in_grad.numel(), lod[0].back() * second_dim); + EXPECT_EQ(in_grad.numel(), lod[0].back() * second_dim); EXPECT_EQ(in_grad.lod(), lod); - for (int64_t i = 0; i < in_grad.lod().size() - 1; ++i) { - int64_t begin = in_grad.lod()[i]; - int64_t end = in_grad.lod()[i + 1]; - Tensor tmp = in_grad.Slice(begin, end); - for (int64_t j = 0; j != tmp.numel(); j) { + for (int64_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { + int64_t begin = in_grad.lod()[0][i]; + int64_t end = in_grad.lod()[0][i + 1]; + paddle::framework::Tensor tmp = in_grad.Slice(begin, end); + for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t m = 0; m != second_dim; ++m) { EXPECT_EQ(tmp.data()[m + j * second_dim], out_grad.data()[m + i * second_dim]); @@ -78,16 +78,14 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { TEST(SequencePoolingGrad, CPU_SUM) { paddle::framework::LoD lod1; - auto dim1 = std::vector{0, 10}; - lod1.push_back(dim1); + lod1.push_back(std::vector{0, 10}); TestSequencePoolingSum(dim, lod1, "SUM", - 16); + paddle::platform::CPUPlace, float>(lod1); paddle::framework::LoD lod2; lod2.push_back(std::vector{0, 2, 7, 10}); TestSequencePoolingSum(lod2, "SUM", 128); + paddle::platform::CPUPlace, float>(lod2); } #ifdef PADDLE_WITH_CUDA @@ -95,11 +93,11 @@ TEST(SequencePoolingGrad, CUDA_SUM) { paddle::framework::LoD lod1; lod1.push_back(std::vector{0, 10}); TestSequencePoolingSum(lod1, "SUM", 16); + paddle::platform::CUDAPlace, float>(lod1); paddle::framework::LoD lod2; lod2.push_back(std::vector{0, 2, 7, 10}); TestSequencePoolingSum(lod2, "SUM", 128); + paddle::platform::CUDAPlace, float>(lod2); } #endif From 14ebc424d65a32828afe2687d227e08c80a6dc44 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 16:09:33 +0800 Subject: [PATCH 3/7] Add gpu support for unittest --- .../operators/math/sequence_pooling_test.cc | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index c994d470d4..af697f1ad8 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -19,17 +19,18 @@ limitations under the License. */ template void TestSequencePoolingSum(const paddle::framework::LoD& lod) { paddle::framework::LoDTensor cpu_out_grad; + paddle::framework::LoDTensor cpu_in_grad; paddle::framework::LoDTensor out_grad; paddle::framework::LoDTensor in_grad; const size_t second_dim = 128u; // construct out_grad's tensor in cpu - const size_t out_first_dim = lod.size() - 1; + const size_t out_first_dim = lod[0].size() - 1; auto out_dims = paddle::framework::make_ddim( {static_cast(out_first_dim), static_cast(second_dim)}); cpu_out_grad.mutable_data(out_dims, paddle::platform::CPUPlace()); - for (int64_t i = 0; i < out_grad.numel(); ++i) { + for (int64_t i = 0; i < cpu_out_grad.numel(); ++i) { cpu_out_grad.data()[i] = static_cast(i); } @@ -58,16 +59,38 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { paddle::operators::math::SequencePoolGradFunctor()( *context, "SUM", out_grad, &in_grad); + if (paddle::platform::is_cpu_place(*place)) { + cpu_in_grad = in_grad; + } else { + TensorCopySync(in_grad, paddle::platform::CPUPlace(), &cpu_in_grad); + cpu_in_grad.set_lod(in_grad.lod()); + } + EXPECT_EQ(in_grad.numel(), lod[0].back() * second_dim); EXPECT_EQ(in_grad.lod(), lod); - for (int64_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { - int64_t begin = in_grad.lod()[0][i]; - int64_t end = in_grad.lod()[0][i + 1]; - paddle::framework::Tensor tmp = in_grad.Slice(begin, end); - for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { - for (int64_t m = 0; m != second_dim; ++m) { - EXPECT_EQ(tmp.data()[m + j * second_dim], - out_grad.data()[m + i * second_dim]); + + if (paddle::platform::is_cpu_place(*place)) { + for (int64_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { + int64_t begin = in_grad.lod()[0][i]; + int64_t end = in_grad.lod()[0][i + 1]; + paddle::framework::Tensor tmp = in_grad.Slice(begin, end); + for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { + for (int64_t m = 0; m != second_dim; ++m) { + EXPECT_EQ(tmp.data()[m + j * second_dim], + out_grad.data()[m + i * second_dim]); + } + } + } + } else { + for (int64_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { + int64_t begin = cpu_in_grad.lod()[0][i]; + int64_t end = cpu_in_grad.lod()[0][i + 1]; + paddle::framework::Tensor tmp = cpu_in_grad.Slice(begin, end); + for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { + for (int64_t m = 0; m != second_dim; ++m) { + EXPECT_EQ(tmp.data()[m + j * second_dim], + cpu_out_grad.data()[m + i * second_dim]); + } } } } From 9c687090365e0721c04c7623b08651c6e211b7aa Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 16:12:57 +0800 Subject: [PATCH 4/7] Accelerate sequence_pool functor --- .../fluid/operators/math/sequence_pooling.cc | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 235b5405fb..fd93e431ab 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -231,9 +231,30 @@ class SequencePoolGradFunctor { math::SetConstant functor; functor(context, in_grad, 0); } + + if (pooltype == "SUM") { + auto lod = in_grad->lod()[0]; + int64_t out_w = out_grad.numel() / out_grad.dims()[0]; + int64_t in_w = in_grad->numel() / in_grad->dims()[0]; + PADDLE_ENFORCE(in_w == out_w); + const T* out_g_data = out_grad.data(); + T* in_g_data = in_grad->mutable_data(context.GetPlace()); + auto blas = math::GetBlas(context); + for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { + int64_t h = static_cast(lod[i + 1] - lod[i]); + int64_t in_offset = lod[i]; + const T* out_pos = out_g_data + i * out_w; + T* in_pos = in_g_data + in_offset; + for (int r = 0; r != h; ++r) { + blas.VCOPY(in_w, out_pos, in_pos + r * in_w); + } + } + + return; + } + auto lod = in_grad->lod()[0]; auto& place = *context.eigen_device(); - auto blas = math::GetBlas(context); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { auto in_g_t = in_grad->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); @@ -247,12 +268,6 @@ class SequencePoolGradFunctor { if (pooltype == "AVERAGE") { in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); - } else if (pooltype == "SUM") { - const T* out_g_data = out_g_t.data(); - T* in_g_data = in_g_t.mutable_data(context.GetPlace()); - for (int r = 0; r != h; ++r) { - blas.VCOPY(w, out_g_data, in_g_data + r * w); - } } else if (pooltype == "SQRT") { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); From 9725db0d40c11a9e3a3853527119cce57c902908 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 16:51:41 +0800 Subject: [PATCH 5/7] Fix copy wrong pos bug test=develop --- paddle/fluid/operators/math/sequence_pooling.cc | 2 +- paddle/fluid/operators/math/sequence_pooling_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index fd93e431ab..c2849d9776 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -242,7 +242,7 @@ class SequencePoolGradFunctor { auto blas = math::GetBlas(context); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { int64_t h = static_cast(lod[i + 1] - lod[i]); - int64_t in_offset = lod[i]; + int64_t in_offset = lod[i] * in_w; const T* out_pos = out_g_data + i * out_w; T* in_pos = in_g_data + in_offset; for (int r = 0; r != h; ++r) { diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index af697f1ad8..2bc008dd34 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -70,7 +70,7 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { EXPECT_EQ(in_grad.lod(), lod); if (paddle::platform::is_cpu_place(*place)) { - for (int64_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { + for (int64_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { int64_t begin = in_grad.lod()[0][i]; int64_t end = in_grad.lod()[0][i + 1]; paddle::framework::Tensor tmp = in_grad.Slice(begin, end); From 2468057da64799f00a482f0dda5b47ad7ecb607b Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 24 Oct 2018 16:59:31 +0800 Subject: [PATCH 6/7] Move code to SumSeqPoolGradFunctor test=develop --- .../fluid/operators/math/sequence_pooling.cc | 44 ++++++++++++------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index c2849d9776..7be8539a7b 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -157,6 +157,31 @@ class FirstSeqPoolFunctor { } }; +template +class SumSeqPoolGradFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& out_grad, + framework::LoDTensor* in_grad) { + auto lod = in_grad->lod()[0]; + int64_t out_w = out_grad.numel() / out_grad.dims()[0]; + int64_t in_w = in_grad->numel() / in_grad->dims()[0]; + PADDLE_ENFORCE(in_w == out_w); + const T* out_g_data = out_grad.data(); + T* in_g_data = in_grad->mutable_data(context.GetPlace()); + auto blas = math::GetBlas(context); + for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { + int64_t h = static_cast(lod[i + 1] - lod[i]); + int64_t in_offset = lod[i] * in_w; + const T* out_pos = out_g_data + i * out_w; + T* in_pos = in_g_data + in_offset; + for (int r = 0; r != h; ++r) { + blas.VCOPY(in_w, out_pos, in_pos + r * in_w); + } + } + } +}; + template class SequencePoolFunctor { public: @@ -233,23 +258,8 @@ class SequencePoolGradFunctor { } if (pooltype == "SUM") { - auto lod = in_grad->lod()[0]; - int64_t out_w = out_grad.numel() / out_grad.dims()[0]; - int64_t in_w = in_grad->numel() / in_grad->dims()[0]; - PADDLE_ENFORCE(in_w == out_w); - const T* out_g_data = out_grad.data(); - T* in_g_data = in_grad->mutable_data(context.GetPlace()); - auto blas = math::GetBlas(context); - for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { - int64_t h = static_cast(lod[i + 1] - lod[i]); - int64_t in_offset = lod[i] * in_w; - const T* out_pos = out_g_data + i * out_w; - T* in_pos = in_g_data + in_offset; - for (int r = 0; r != h; ++r) { - blas.VCOPY(in_w, out_pos, in_pos + r * in_w); - } - } - + math::SumSeqPoolGradFunctor sum_pool_grad; + sum_pool_grad(context, out_grad, in_grad); return; } From 97a87bb38b1d3d2c8b3fe426bb30c4a6b59dd3ef Mon Sep 17 00:00:00 2001 From: gongweibao Date: Thu, 25 Oct 2018 14:02:25 +0800 Subject: [PATCH 7/7] Fix transformer unittest. (#13974) Fix transformer unittest --- .../fluid/tests/unittests/CMakeLists.txt | 6 ++--- .../fluid/tests/unittests/dist_transformer.py | 23 +++++++------------ .../tests/unittests/test_dist_transformer.py | 6 +++-- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 7de0ebce06..68e498c6e8 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -78,9 +78,9 @@ if(WITH_DISTRIBUTE) set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200) py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) set_tests_properties(test_dist_se_resnext PROPERTIES TIMEOUT 1000) - # TODO: fix this test - #py_test_modules(test_dist_transformer MODULES test_dist_transformer) - #set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000) + + py_test_modules(test_dist_transformer MODULES test_dist_transformer) + set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000) endif(NOT APPLE) py_test_modules(test_dist_transpiler MODULES test_dist_transpiler) endif() diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index a2cc574258..ab44954811 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -35,7 +35,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid import core -from test_dist_base import TestDistRunnerBase, runtime_main +from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP import paddle.compat as cpt from paddle.compat import long_type @@ -562,18 +562,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, for pass_id in six.moves.xrange(TrainTaskConfig.pass_num): pass_start_time = time.time() for batch_id, data in enumerate(train_data()): - if batch_id >= 5: + if batch_id >= RUN_STEP: break feed_list = [] total_num_token = 0 - #if TrainTaskConfig.local: - # lr_rate = lr_scheduler.update_learning_rate() - #for place_id, data_buffer in enumerate( - # split_data( - # data, num_part=dev_count)): - if TrainTaskConfig.local: lr_rate = lr_scheduler.update_learning_rate() @@ -619,12 +613,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, init = True # Validate and save the model for inference. - if batch_id == 0 or batch_id == 4: - if TrainTaskConfig.val_file_pattern is not None: - val_avg_cost, val_ppl = test() - print("[%f]" % val_avg_cost) - else: - assert (False) + if TrainTaskConfig.val_file_pattern is not None: + val_avg_cost, val_ppl = test() + print("[%f]" % val_avg_cost) + else: + assert (False) #import transformer_reader as reader @@ -1701,7 +1694,7 @@ class DistTransformer2x2(TestDistRunnerBase): def run_trainer(self, args): TrainTaskConfig.use_gpu = args.use_cuda - sum_cost, avg_cost, predict, token_num, local_lr_scheduler = get_model( + sum_cost, avg_cost, predict, token_num, local_lr_scheduler, test_program = get_model( args.is_dist, not args.sync_mode) if args.is_dist: diff --git a/python/paddle/fluid/tests/unittests/test_dist_transformer.py b/python/paddle/fluid/tests/unittests/test_dist_transformer.py index 47e8dfaf03..25dcccc28d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transformer.py @@ -61,7 +61,8 @@ class TestDistTransformer2x2Sync(TestDistBase): def test_dist_train(self): download_files() - self.check_with_place("dist_transformer.py", delta=1e-5) + self.check_with_place( + "dist_transformer.py", delta=1e-5, check_error_log=False) class TestDistTransformer2x2Async(TestDistBase): @@ -70,7 +71,8 @@ class TestDistTransformer2x2Async(TestDistBase): def test_dist_train(self): download_files() - self.check_with_place("dist_transformer.py", delta=1.0) + self.check_with_place( + "dist_transformer.py", delta=1.0, check_error_log=False) if __name__ == "__main__":