From 2dccdc3ccf01e6c660ac2276188297388bcb6780 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 27 Oct 2017 10:22:27 +0800 Subject: [PATCH 1/4] update benchmark data on VGG19 --- benchmark/IntelOptimizedPaddle.md | 48 +++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 benchmark/IntelOptimizedPaddle.md diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md new file mode 100644 index 0000000000..f2744c075d --- /dev/null +++ b/benchmark/IntelOptimizedPaddle.md @@ -0,0 +1,48 @@ +# Benchmark + +Machine: + +- Server + - Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket +- Laptop + - DELL XPS15-9560-R1745: i7-7700HQ 8G 256GSSD + - i5 MacBook Pro (Retina, 13-inch, Early 2015) +- Desktop + - i7-6700k + +System: CentOS 7.3.1611 + +PaddlePaddle: commit cfa86a3f70cb5f2517a802f32f2c88d48ab4e0e0 + +- MKL-DNN tag v0.10 +- MKLML 2018.0.20170720 +- OpenBLAS v0.2.20 + +On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. + +## Benchmark Model + +### Server +Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz + +Input image size - 3 * 224 * 224, Time: images/second + +- VGG-19 + +| BatchSize | 64 | 128 | 256 | +|--------------|-------| -----| --------| +| OpenBLAS | 7.86 | 9.02 | 10.62 | +| MKLML | 11.80 | 13.43 | 16.21 | +| MKL-DNN | 29.07 | 30.40 | 31.06 | + + +chart on batch size 128 +TBD + + - ResNet + - GoogLeNet + +### Laptop +TBD +### Desktop +TBD From 56f6e231c6fb4cf2af5f11e7d7b0fe53deef4044 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 30 Oct 2017 15:41:00 +0800 Subject: [PATCH 2/4] refine mkldnntester, support comparing values near zero --- paddle/gserver/tests/MKLDNNTester.cpp | 28 ++++++++++++++++----------- paddle/gserver/tests/MKLDNNTester.h | 10 +++++----- paddle/gserver/tests/test_MKLDNN.cpp | 3 +-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index 73b7e8857f..c345a16221 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -273,31 +273,37 @@ void MKLDNNTester::printVector(const VectorPtr& v) { VLOG(MKLDNN_ALL) << std::endl << ostr.str(); } -double MKLDNNTester::getDelta(const real* d1, - const real* d2, +double MKLDNNTester::getDelta(const real* refer, + const real* value, size_t len, const float failRate, const float thres) { double delta = 0, sum = 0; int failCnt = 0; const double eps = 1e-5; - double maxOut = 0; + double maxRatio = 0; for (size_t i = 0; i < len; ++i) { - double ref = fabs(d2[i]); - double diff = fabs(d1[i] - d2[i]); + double ref = fabs(refer[i]); + double val = fabs(value[i]); + double diff = fabs(refer[i] - value[i]); delta += diff; sum += ref; - if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { - maxOut = std::max(maxOut, diff / ref); + if (ref < eps && val < eps) { // both values are very small + continue; + } + double ratio = diff / ref; + if (ratio > thres) { + maxRatio = std::max(maxRatio, ratio); failCnt++; } } - EXPECT_TRUE(std::isnormal(sum)); EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(sum)); EXPECT_FALSE(std::isnan(delta)); VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len << ", delta: " << delta / sum << ", failCnt:" << failCnt; - return (failCnt / (float)len) > failRate ? maxOut : delta / sum; + double res = sum > eps ? delta / sum : eps; + return (failCnt / (float)len) > failRate ? maxRatio : res; } double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { @@ -543,12 +549,12 @@ void MKLDNNTester::getOutResult(const std::string& configPath, void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) { CHECK_EQ(ref.outValues.size(), dnn.outValues.size()); CHECK_EQ(ref.paraValues.size(), dnn.paraValues.size()); - VLOG(MKLDNN_TESTS) << "compare value size: " << ref.outValues.size(); for (size_t i = 0; i < ref.outValues.size(); i++) { + VLOG(MKLDNN_TESTS) << "compare value index: " << i; EXPECT_LE(fabs(compareMatrix(ref.outValues[i], dnn.outValues[i])), eps); } - VLOG(MKLDNN_TESTS) << "compare param size: " << ref.outValues.size(); for (size_t i = 0; i < ref.paraValues.size(); i++) { + VLOG(MKLDNN_TESTS) << "compare param index: " << i; EXPECT_LE(fabs(compareVector(ref.paraValues[i], dnn.paraValues[i])), eps); } } diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index 19d8848f74..a99715cff0 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -128,13 +128,13 @@ private: /** * Get delta percent - * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the - * max(diff/ref) - * else return sum(abs(a-b)) / sum(abs(b)) + * if many(>failRate) wrong(abs(val-ref)/abs(ref) > thres) points + * return the max(diff/ref) + * else return sum(abs(diff)) / sum(abs(ref)) * The return value should be smaller than eps when passing. */ - static double getDelta(const real* d1, - const real* d2, + static double getDelta(const real* refer, + const real* value, size_t len, const float failRate = 1e-3, const float thres = 0.1); diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 85d4f437c2..b99192ca0f 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -234,8 +234,7 @@ static void getMKLDNNBatchNormConfig(TestConfig& cfg, cfg.inputDefs.push_back({INPUT_DATA, "layer_2_moving_var", 1, size_t(pm.ic)}); cfg.inputDefs.back().isStatic = true; LayerInputConfig* input = cfg.layerConfig.add_inputs(); - // TODO(TJ): uncomment me when refine and support comparing all zeroes vector - // cfg.layerConfig.set_active_type("relu"); + cfg.layerConfig.set_active_type("relu"); cfg.layerConfig.add_inputs(); cfg.layerConfig.add_inputs(); ImageConfig* img_conf = input->mutable_image_conf(); From 3eb42bfd6f3affbe856d731046a5e4e63c6c42da Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 30 Oct 2017 21:32:05 +0800 Subject: [PATCH 3/4] move test_CompareMKLDNNandCPU to test_MKLDNN and remove unused code --- paddle/gserver/tests/MKLDNNTester.cpp | 22 +-- paddle/gserver/tests/MKLDNNTester.h | 10 +- paddle/gserver/tests/mkldnn_branch_net.conf | 142 ++++++++++++++++++ paddle/gserver/tests/mkldnn_branches_fc.conf | 58 ------- .../gserver/tests/mkldnn_branches_pool.conf | 60 -------- ...nches_conv.conf => mkldnn_simple_net.conf} | 48 +++--- paddle/gserver/tests/test_MKLDNN.cpp | 8 +- paddle/math/MKLDNNMatrix.h | 5 + paddle/trainer/tests/CMakeLists.txt | 16 -- .../sample_trainer_config_branch_net.conf | 133 ---------------- .../sample_trainer_config_simple_net.conf | 68 --------- paddle/trainer/tests/test_CompareTwoNets.cpp | 11 -- 12 files changed, 197 insertions(+), 384 deletions(-) create mode 100644 paddle/gserver/tests/mkldnn_branch_net.conf delete mode 100644 paddle/gserver/tests/mkldnn_branches_fc.conf delete mode 100644 paddle/gserver/tests/mkldnn_branches_pool.conf rename paddle/gserver/tests/{mkldnn_branches_conv.conf => mkldnn_simple_net.conf} (64%) delete mode 100644 paddle/trainer/tests/sample_trainer_config_branch_net.conf delete mode 100644 paddle/trainer/tests/sample_trainer_config_simple_net.conf diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index c345a16221..7670cb88fb 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -521,12 +521,16 @@ void MKLDNNTester::getOutResult(const std::string& configPath, gradientMachine->forward(in.inArgs[i], &outArgs, PASS_TRAIN); // save forward result for (size_t k = 0; k < outArgs.size(); k++) { - MatrixPtr value = Matrix::create(outArgs[k].value->getHeight(), - outArgs[k].value->getWidth(), - false, - false); - value->copyFrom(*outArgs[k].value); - out.outValues.push_back(value); + const MatrixPtr& src = outArgs[k].value; + MatrixPtr dst = + Matrix::create(src->getHeight(), src->getWidth(), false, false); + if (typeid(*src) == typeid(MKLDNNMatrix)) { + MKLDNNMatrixPtr dnnSrc = std::dynamic_pointer_cast(src); + dnnSrc->copyTo(*dst); + } else { + dst->copyFrom(*src); + } + out.outValues.push_back(dst); } // random backward input @@ -559,9 +563,9 @@ void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) { } } -void MKLDNNTester::runBranchesTest(const std::string& configPath, - size_t iter, - float eps) { +void MKLDNNTester::runNetTest(const std::string& configPath, + size_t iter, + float eps) { DataIn in; initArgument(in, configPath, iter); DataOut outCpu, outDnn; diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index a99715cff0..ca55a45bc7 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -85,17 +85,17 @@ public: bool printDetails = false, size_t iter = 3, float epsilon = 1e-4); - static void runBranchesTest(const std::string& configPath, - size_t iter = 3, - float eps = 1e-4); + static void runNetTest(const std::string& configPath, + size_t iter = 2, + float eps = 1e-4); static void initArgument(DataIn& data, const std::string& configPath, - size_t iter = 3); + size_t iter = 2); static void getOutResult(const std::string& configPath, DataIn& in, DataOut& out, bool use_mkldnn, - size_t iter = 3); + size_t iter = 2); private: void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); diff --git a/paddle/gserver/tests/mkldnn_branch_net.conf b/paddle/gserver/tests/mkldnn_branch_net.conf new file mode 100644 index 0000000000..8d5146abb0 --- /dev/null +++ b/paddle/gserver/tests/mkldnn_branch_net.conf @@ -0,0 +1,142 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +settings(batch_size=16) +channels = get_config_arg("channels", int, 2) + +def two_conv(input, group_name): + out1 = img_conv_layer(input=input, + name=group_name+'_conv1_', + filter_size=1, + num_filters=channels, + padding=0, + shared_biases=True, + act=ReluActivation()) + + out2 = img_conv_layer(input=input, + name=group_name+'_conv2_', + filter_size=3, + num_filters=channels, + padding=1, + shared_biases=True, + act=ReluActivation()) + return out1, out2 + +def two_conv_bn(input, group_name): + out1, out2 = two_conv(input, group_name) + out1 = batch_norm_layer(input=out1, + name=group_name+'_bn1_', + use_global_stats=False, + act=ReluActivation()) + + out2 = batch_norm_layer(input=out2, + name=group_name+'_bn2_', + use_global_stats=False, + act=ReluActivation()) + return out1, out2 + +def two_conv_pool(input, group_name): + out1, out2 = two_conv(input, group_name) + out1 = img_pool_layer(input=out1, + name=group_name+'_pool1_', + pool_size=3, + stride=2, + padding=0, + pool_type=MaxPooling()) + + out2 = img_pool_layer(input=out2, + name=group_name+'_pool2_', + pool_size=5, + stride=2, + padding=1, + pool_type=MaxPooling()) + return out1, out2 + +def two_fc(input, group_name): + out1 = fc_layer(input=input, + name=group_name+'_fc1_', + size=channels, + bias_attr=False, + act=LinearActivation()) + + out2 = fc_layer(input=input, + name=group_name+'_fc2_', + size=channels, + bias_attr=False, + act=LinearActivation()) + return out1, out2 + +data = data_layer(name ="input", size=channels*16*16) + +tmp = img_conv_layer(input=data, + num_channels=channels, + filter_size=3, + num_filters=channels, + padding=1, + shared_biases=True, + act=ReluActivation()) + +a1, a2 = two_conv(tmp, 'conv_branch') +tmp = addto_layer(input=[a1, a2], + act=ReluActivation(), + bias_attr=False) + +tmp = img_pool_layer(input=tmp, + pool_size=3, + stride=2, + padding=1, + pool_type=AvgPooling()) + +b1, b2 = two_conv_pool(tmp, 'pool_branch') +tmp = concat_layer(input=[b1, b2]) + +tmp = img_pool_layer(input=tmp, + num_channels=channels*2, + pool_size=3, + stride=2, + padding=1, + pool_type=MaxPooling()) + +tmp = img_conv_layer(input=tmp, + filter_size=3, + num_filters=channels, + padding=1, + stride=2, + shared_biases=True, + act=LinearActivation(), + bias_attr=False) + +tmp = batch_norm_layer(input=tmp, + use_global_stats=False, + act=ReluActivation()) + +c1, c2 = two_conv_bn(tmp, 'bn_branch') +tmp = addto_layer(input=[c1, c2], + act=ReluActivation(), + bias_attr=False) + +tmp = fc_layer(input=tmp, size=channels, + bias_attr=True, + act=ReluActivation()) + +d1, d2 = two_fc(tmp, 'fc_branch') +tmp = addto_layer(input=[d1, d2]) + +out = fc_layer(input=tmp, size=10, + bias_attr=True, + act=SoftmaxActivation()) + +outputs(out) diff --git a/paddle/gserver/tests/mkldnn_branches_fc.conf b/paddle/gserver/tests/mkldnn_branches_fc.conf deleted file mode 100644 index fb85425c2b..0000000000 --- a/paddle/gserver/tests/mkldnn_branches_fc.conf +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=16) -channels = get_config_arg("channels", int, 2) - -def two_fc(input, group_name): - out1 = fc_layer(input=input, - name=group_name+'_fc1', - size=channels, - bias_attr=False, - act=LinearActivation()) - - out2 = fc_layer(input=input, - name=group_name+'_fc2', - size=channels, - bias_attr=False, - act=LinearActivation()) - return out1, out2 - -data = data_layer(name ="input", size=channels*16*16) - -conv = img_conv_layer(input=data, - num_channels=channels, - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=LinearActivation()) - -pool = img_pool_layer(input=conv, - pool_size=3, - stride=2, - padding=1, - pool_type=AvgPooling()) - -a1, a2 = two_fc(input=pool, group_name='a') - -concat = concat_layer(input=[a1, a2]) - -b1, b2 = two_fc(input=pool, group_name='b') - -addto = addto_layer(input=[b1, b2]) - -outputs([concat, addto]) diff --git a/paddle/gserver/tests/mkldnn_branches_pool.conf b/paddle/gserver/tests/mkldnn_branches_pool.conf deleted file mode 100644 index ca17c74752..0000000000 --- a/paddle/gserver/tests/mkldnn_branches_pool.conf +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=16) -channels = get_config_arg("channels", int, 2) - -def two_pool(input, group_name): - out1 = img_pool_layer(input=input, - name=group_name+'_pool1', - pool_size=3, - stride=2, - padding=0, - pool_type=MaxPooling()) - - out2 = img_pool_layer(input=input, - name=group_name+'_pool2', - pool_size=5, - stride=2, - padding=1, - pool_type=MaxPooling()) - return out1, out2 - -data = data_layer(name ="input", size=channels*16*16) - -conv = img_conv_layer(input=data, - num_channels=channels, - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=LinearActivation()) - -pool = img_pool_layer(input=conv, - pool_size=3, - stride=1, - padding=1, - pool_type=AvgPooling()) - -a1, a2 = two_pool(input=pool, group_name='a') - -concat = concat_layer(input=[a1, a2]) - -b1, b2 = two_pool(input=pool, group_name='b') - -addto = addto_layer(input=[b1, b2]) - -outputs([concat, addto]) diff --git a/paddle/gserver/tests/mkldnn_branches_conv.conf b/paddle/gserver/tests/mkldnn_simple_net.conf similarity index 64% rename from paddle/gserver/tests/mkldnn_branches_conv.conf rename to paddle/gserver/tests/mkldnn_simple_net.conf index 2628509db4..8bbe91e56d 100644 --- a/paddle/gserver/tests/mkldnn_branches_conv.conf +++ b/paddle/gserver/tests/mkldnn_simple_net.conf @@ -17,40 +17,48 @@ from paddle.trainer_config_helpers import * settings(batch_size=16) channels = get_config_arg("channels", int, 2) -def two_conv(input, group_name): - out1 = img_conv_layer(input=input, - name=group_name+'_conv1', - filter_size=1, - num_filters=channels, - padding=0, - shared_biases=True, - act=ReluActivation()) +data = data_layer(name ="input", size=channels*16*16) - out2 = img_conv_layer(input=input, - name=group_name+'_conv2', +tmp = img_conv_layer(input=data, + num_channels=channels, filter_size=3, num_filters=channels, padding=1, shared_biases=True, act=ReluActivation()) - return out1, out2 -data = data_layer(name ="input", size=channels*16*16) +tmp = img_pool_layer(input=tmp, + pool_size=3, + stride=1, + padding=0, + pool_type=AvgPooling()) -conv = img_conv_layer(input=data, - num_channels=channels, +tmp = img_conv_layer(input=tmp, filter_size=3, num_filters=channels, padding=1, shared_biases=True, - act=ReluActivation()) + act=LinearActivation(), + bias_attr=False) -a1, a2 = two_conv(input=conv, group_name='a') +tmp = batch_norm_layer(input=tmp, + use_global_stats=False, + act=ReluActivation()) -concat = concat_layer(input=[a1, a2]) +tmp = img_pool_layer(input=tmp, + pool_size=3, + stride=2, + padding=1, + pool_type=MaxPooling()) -b1, b2 = two_conv(input=conv, group_name='b') +tmp = fc_layer(input=tmp, + size=channels, + bias_attr=False, + act=ReluActivation()) -addto = addto_layer(input=[b1, b2]) +out = fc_layer(input=tmp, + size=10, + bias_attr=True, + act=SoftmaxActivation()) -outputs([concat, addto]) +outputs(out) diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index b99192ca0f..d60b0f04a1 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -308,15 +308,15 @@ TEST(MKLDNNActivation, Activations) { } DECLARE_string(config_args); -TEST(MKLDNNLayer, branches) { - std::vector cases = {"conv", "pool", "fc"}; +TEST(MKLDNNNet, net) { + std::vector cases = {"simple", "branch"}; for (auto name : cases) { - std::string config = "./gserver/tests/mkldnn_branches_" + name + ".conf"; + std::string config = "./gserver/tests/mkldnn_" + name + "_net.conf"; for (auto channels : {2, 32}) { std::ostringstream oss; oss << "channels=" << channels; FLAGS_config_args = oss.str(); - MKLDNNTester::runBranchesTest(config); + MKLDNNTester::runNetTest(config); } } } diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 5f5b819017..54cfefe23b 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -102,6 +102,11 @@ public: m_->copyFrom(src); } + void copyTo(Matrix& dst) { + // TODO(TJ): reorder data if this format is not nchw or x + dst.copyFrom(*m_); + } + public: /** * Reorder this MKLDNNMatrix from other format. diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 5ebbb99c94..f01ad4142d 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -37,22 +37,6 @@ add_test(NAME test_CompareTwoNets --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) -################ test_CompareMKLDNNandCPU ###################### -if(WITH_MKLDNN) - macro(gen_command VAR_NAME CONFIG_FILE) - set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/" - "${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False" - "--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True" - "--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False" - "WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/") - endmacro() - add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp) - gen_command(compare_simple_net "sample_trainer_config_simple_net.conf") - gen_command(compare_branch_net "sample_trainer_config_branch_net.conf") - add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net}) - add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net}) -endif() - ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts test_CompareTwoOpts.cpp) diff --git a/paddle/trainer/tests/sample_trainer_config_branch_net.conf b/paddle/trainer/tests/sample_trainer_config_branch_net.conf deleted file mode 100644 index 3d8fb77a11..0000000000 --- a/paddle/trainer/tests/sample_trainer_config_branch_net.conf +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -################################### Data Configuration ################################### -TrainData(ProtoData(files = "trainer/tests/mnist.list")) -################################### Algorithm Configuration ################################### -settings(batch_size = 128, - learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) -################################### Network Configuration ################################### -data = data_layer(name ="input", size=784) - -tmp = img_conv_layer(input=data, - num_channels=1, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=ReluActivation()) - -a1 = img_conv_layer(input=tmp, - filter_size=1, - num_filters=32, - padding=0, - shared_biases=True, - act=ReluActivation()) - -a2 = img_conv_layer(input=tmp, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=ReluActivation()) - -tmp = addto_layer(input=[a1, a2], - act=ReluActivation(), - bias_attr=False) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=2, - padding=1, - pool_type=AvgPooling()) - -b1 = img_conv_layer(input=tmp, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=ReluActivation()) - -b1 = img_pool_layer(input=b1, - pool_size=3, - stride=2, - padding=0, - pool_type=MaxPooling()) - -b2 = img_conv_layer(input=tmp, - filter_size=3, - num_filters=64, - padding=1, - shared_biases=True, - act=ReluActivation()) - -b2 = img_pool_layer(input=b2, - pool_size=5, - stride=2, - padding=1, - pool_type=MaxPooling()) - -tmp = concat_layer(input=[b1, b2]) - -tmp = img_pool_layer(input=tmp, - num_channels=96, - pool_size=3, - stride=2, - padding=1, - pool_type=MaxPooling()) - -tmp = img_conv_layer(input=tmp, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=LinearActivation(), - bias_attr=False) - -tmp = batch_norm_layer(input=tmp, - use_global_stats=False, - act=ReluActivation()) - -c1 = img_conv_layer(input=tmp, - filter_size=1, - num_filters=32, - padding=0, - shared_biases=True, - act=ReluActivation()) - -c2 = img_conv_layer(input=tmp, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=ReluActivation()) - -tmp = addto_layer(input=[c1, c2], - act=ReluActivation(), - bias_attr=False) - -tmp = fc_layer(input=tmp, size=64, - bias_attr=False, - act=TanhActivation()) - -output = fc_layer(input=tmp, size=10, - bias_attr=True, - act=SoftmaxActivation()) - -lbl = data_layer(name ="label", size=10) - -cost = classification_cost(input=output, label=lbl) -outputs(cost) diff --git a/paddle/trainer/tests/sample_trainer_config_simple_net.conf b/paddle/trainer/tests/sample_trainer_config_simple_net.conf deleted file mode 100644 index c615b5622b..0000000000 --- a/paddle/trainer/tests/sample_trainer_config_simple_net.conf +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -################################### Data Configuration ################################### -TrainData(ProtoData(files = "trainer/tests/mnist.list")) -################################### Algorithm Configuration ################################### -settings(batch_size = 128, - learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) -################################### Network Configuration ################################### -data = data_layer(name ="input", size=784) - -tmp = img_conv_layer(input=data, - num_channels=1, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=ReluActivation()) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=2, - padding=1, - pool_type=AvgPooling()) - -tmp = img_conv_layer(input=tmp, - filter_size=3, - num_filters=32, - padding=1, - shared_biases=True, - act=LinearActivation(), - bias_attr=False) - -tmp = batch_norm_layer(input=tmp, - use_global_stats=False, - act=ReluActivation()) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=2, - padding=1, - pool_type=MaxPooling()) - -tmp = fc_layer(input=tmp, size=64, - bias_attr=True, - act=ReluActivation()) - -output = fc_layer(input=tmp, size=10, - bias_attr=True, - act=SoftmaxActivation()) - -lbl = data_layer(name ="label", size=10) - -cost = classification_cost(input=output, label=lbl) -outputs(cost) diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/trainer/tests/test_CompareTwoNets.cpp index 307645d2c3..94f65e545d 100644 --- a/paddle/trainer/tests/test_CompareTwoNets.cpp +++ b/paddle/trainer/tests/test_CompareTwoNets.cpp @@ -26,15 +26,12 @@ DECLARE_int32(gpu_id); DECLARE_bool(local); DECLARE_bool(use_gpu); -DECLARE_bool(use_mkldnn); DECLARE_string(config); DECLARE_string(nics); DEFINE_string(config_file_a, "", "config of one network to compare"); DEFINE_string(config_file_b, "", "config of another network to compare"); -DEFINE_bool(use_mkldnn_a, false, "whether to use mkldnn to run config_file_a"); -DEFINE_bool(use_mkldnn_b, false, "whether to use mkldnn to run config_file_b"); DEFINE_bool(need_high_accuracy, false, "whether need to run in double accuracy"); @@ -131,12 +128,6 @@ void compareGradient(ComData& comDataA, ComData& comDataB) { matA.getWidth()); } - if (FLAGS_use_mkldnn_a || FLAGS_use_mkldnn_b) { - // some format of mkldnn parameter is different with cpu - // test_MKLDNN will check the parameters - return; - } - vector& parametersA = comDataA.parameters; vector& parametersB = comDataB.parameters; @@ -176,12 +167,10 @@ void compareGradient(ComData& comDataA, ComData& comDataB) { TEST(Trainer, create) { ComData dataA; - FLAGS_use_mkldnn = FLAGS_use_mkldnn_a; calcGradient(dataA, FLAGS_config_file_a); LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n"; ComData dataB; - FLAGS_use_mkldnn = FLAGS_use_mkldnn_b; calcGradient(dataB, FLAGS_config_file_b); LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n"; From 5bd188651740ac577f9cdc97b54137474031f122 Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Wed, 1 Nov 2017 21:56:26 +0800 Subject: [PATCH 4/4] update the VGG benchmark on CentOs6.3 and Intel 6148 --- benchmark/IntelOptimizedPaddle.md | 84 +++++++++++++++---------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md index f2744c075d..1bf9ea9df0 100644 --- a/benchmark/IntelOptimizedPaddle.md +++ b/benchmark/IntelOptimizedPaddle.md @@ -1,48 +1,48 @@ -# Benchmark - -Machine: - +# Benchmark + +Machine: + - Server - - Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket + - Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket - Laptop - DELL XPS15-9560-R1745: i7-7700HQ 8G 256GSSD - - i5 MacBook Pro (Retina, 13-inch, Early 2015) -- Desktop - - i7-6700k - -System: CentOS 7.3.1611 - -PaddlePaddle: commit cfa86a3f70cb5f2517a802f32f2c88d48ab4e0e0 - + - i5 MacBook Pro (Retina, 13-inch, Early 2015) +- Desktop + - i7-6700k + +System: CentOS release 6.3 (Final), Docker 1.12.1. + +PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0) + - MKL-DNN tag v0.10 - MKLML 2018.0.20170720 -- OpenBLAS v0.2.20 - -On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. - -## Benchmark Model - -### Server -Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz - -Input image size - 3 * 224 * 224, Time: images/second - -- VGG-19 - -| BatchSize | 64 | 128 | 256 | -|--------------|-------| -----| --------| -| OpenBLAS | 7.86 | 9.02 | 10.62 | -| MKLML | 11.80 | 13.43 | 16.21 | -| MKL-DNN | 29.07 | 30.40 | 31.06 | - - -chart on batch size 128 -TBD - +- OpenBLAS v0.2.20 + +On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. + +## Benchmark Model + +### Server +Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz + +Input image size - 3 * 224 * 224, Time: images/second + +- VGG-19 + +| BatchSize | 64 | 128 | 256 | +|--------------|-------| -----| --------| +| OpenBLAS | 7.82 | 8.62 | 10.34 | +| MKLML | 11.02 | 12.86 | 15.33 | +| MKL-DNN | 27.69 | 28.8 | 29.27 | + + +chart on batch size 128 +TBD + - ResNet - - GoogLeNet - -### Laptop -TBD -### Desktop -TBD + - GoogLeNet + +### Laptop +TBD +### Desktop +TBD