Merge branch 'develop' of github.com:PaddlePaddle/Paddle into parallel_graph_mode

6 years ago · a760a550b0
parent fd144954ed 363bf8a4d8
commit a760a550b0
65 changed files with 3152 additions and 483 deletions
--- a/README.md
+++ b/README.md
@ -19,6 +19,15 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
 欢迎来到 PaddlePaddle GitHub
 PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效灵活、可扩展的深度学习平台，最初由百度科学家和工程师共同开发，目的是将深度学习技术应用到百度的众多产品中。
 我们的愿景是让每个人都能通过PaddlePaddle接触深度学习
 跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
 ### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
 ### Install Latest Stable Release:
 ```
@ -34,6 +43,23 @@ pip install paddlepaddle-gpu==1.2.0.post85
 # For installation on other platform, refer to http://paddlepaddle.org/
 ```
 ### PaddlePaddle最新版本: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
 ### 安装最新稳定版本:
 ```
 # Linux CPU
 pip install paddlepaddle
 # Linux GPU cuda9cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda8cudnn7
 pip install paddlepaddle-gpu==1.2.0.post87
 # Linux GPU cuda8cudnn5
 pip install paddlepaddle-gpu==1.2.0.post85
 # 其他平台上的安装指引请参考 http://paddlepaddle.org/
 ```
 ## Features
 - **Flexibility**
@ -74,10 +100,38 @@ pip install paddlepaddle-gpu==1.2.0.post85
    Baidu and it has achieved a significant impact. We hope you can also explore
    the capability of PaddlePaddle to make an impact on your product.
 ## 特点
 - **灵活性**
    PaddlePaddle支持丰富的神经网络架构和优化算法。易于配置复杂模型，例如带有注意力机制或复杂记忆连接的神经网络机器翻译模型。
 -  **高效性**
    为了高效使用异步计算资源，PaddlePaddle对框架的不同层进行优化，包括计算、存储、架构和通信。下面是一些样例：
    - 通过SSE/AVX 内置函数、BLAS库(例如MKL、OpenBLAS、cuBLAS)或定制的CPU/GPU内核优化数学操作。
    - 通过MKL-DNN库优化CNN网络
    - 高度优化循环网络，无需执行 `padding` 操作即可处理 **变长** 序列
    - 针对高维稀疏数据模型，优化了局部和分布式训练。
 - **稳定性**
    有了 PaddlePaddle，使得利用各种CPU/GPU和机器来加速训练变得简单。PaddlePaddle 通过优化通信可以实现巨大吞吐量和快速执行。
 - **连接产品**
    另外，PaddlePaddle 的设计也易于部署。在百度，PaddlePaddle 已经部署到含有巨大用户量的产品和服务上，包括广告点击率（CTR）预测、大规模图像分类、光学字符识别（OCR）、搜索排序，计算机病毒检测、推荐系统等等。PaddlePaddle广泛应用于百度产品中，产生了非常重要的影响。我们希望您也能探索 PaddlePaddle 的能力，为您的产品创造新的影响力和效果。
 ## Installation
 It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website.
 ## 安装
 推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) 
 ## Documentation
 We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and
@ -99,10 +153,37 @@ We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarte
   We appreciate your contributions!
 ## 文档
 我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)和
 [中文](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) 文档
 - [深度学习101](https://github.com/PaddlePaddle/book)
  或许您想从这个在线交互式书籍开始，可以在Jupyter Notebook中运行
 - [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
  可以在MPI集群上运行分布式训练任务
 - [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
   新的API支持代码更少更简洁的程序
 - [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
   欢迎您的贡献!
 ## Ask Questions
 You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
 ## 答疑
 欢迎您将问题和bug报告以[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)的形式提交
 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
 ## 版权和许可证
 PaddlePaddle由[Apache-2.0 license](LICENSE)提供
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@ -81,9 +81,11 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
    # the role, should be either PSERVER or TRAINER
    training_role = os.getenv("PADDLE_TRAINING_ROLE")
-    config = distribute_transpiler.DistributeTranspilerConfig()
+    config = fluid.DistributeTranspilerConfig()
    config.slice_var_up = not args.no_split_var
    config.min_block_size = 1048576
    t = distribute_transpiler.DistributeTranspiler(config=config)
    t.transpile(
        trainer_id,
        # NOTE: *MUST* use train_prog, for we are using with guard to
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@ -14,14 +14,16 @@
 INCLUDE(ExternalProject)
-find_library(SSL_LIBRARY NAMES ssl)
+find_package(OpenSSL REQUIRED) 
 message(STATUS "ssl:" ${OPENSSL_SSL_LIBRARY})
 message(STATUS "crypto:" ${OPENSSL_CRYPTO_LIBRARY})
 ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${SSL_LIBRARY})
+SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${OPENSSL_SSL_LIBRARY})
 find_library(CRYPTO_LIBRARY NAMES crypto)
 ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${CRYPTO_LIBRARY})
+SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${OPENSSL_CRYPTO_LIBRARY})
 SET(BRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/brpc)
 SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc)
@ -31,14 +33,15 @@ SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc libr
 INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR})
 # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args
-set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib")
+set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog")
 # If minimal .a is need, you can set  WITH_DEBUG_SYMBOLS=OFF
 ExternalProject_Add(
    extern_brpc
    ${EXTERNAL_PROJECT_LOG_ARGS}
    # TODO(gongwb): change to de newst repo when they changed.
    GIT_REPOSITORY  "https://github.com/gongweibao/brpc"
-    GIT_TAG         "7dc04defad1fd4173aae170c3fcbde131b65155a"
+    GIT_TAG         "e9b67ec1b7458f2af5fae76451afe1e27e01b4b4"
    PREFIX          ${BRPC_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -50,7 +53,7 @@ ExternalProject_Add(
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    -DCMAKE_PREFIX_PATH=${prefix_path}
-                    -DBRPC_WITH_GLOG=ON
+                    -DWITH_GLOG=ON
                    -DIOBUF_WITH_HUGE_BLOCK=ON
                    -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
                    ${EXTERNAL_OPTIONAL_ARGS}
@ -65,5 +68,6 @@ ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES})
 ADD_DEPENDENCIES(brpc extern_brpc)
 add_definitions(-DBRPC_WITH_GLOG)
 LIST(APPEND external_project_dependencies brpc)
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@ -12,8 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-IF(WITH_TESTING)
+#FIXME:(gongwb) Move brpc's gtest dependency.
-    ENABLE_TESTING()
+IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
    IF(WITH_TESTING)
        ENABLE_TESTING()
    ENDIF(WITH_TESTING)
    INCLUDE(ExternalProject)
    SET(GTEST_SOURCES_DIR ${THIRD_PARTY_PATH}/gtest)
@ -76,4 +80,4 @@ IF(WITH_TESTING)
    ADD_DEPENDENCIES(gtest_main extern_gtest)
    LIST(APPEND external_project_dependencies gtest gtest_main)
-ENDIF(WITH_TESTING)
+ENDIF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
--- a/cmake/external/leveldb.cmake
+++ b/cmake/external/leveldb.cmake
@ -24,8 +24,8 @@ ExternalProject_Add(
    extern_leveldb
    ${EXTERNAL_PROJECT_LOG_ARGS}
    PREFIX ${LEVELDB_SOURCES_DIR}
-    URL "https://github.com/google/leveldb/archive/v1.18.tar.gz"
+    GIT_REPOSITORY "https://github.com/google/leveldb"
-    URL_MD5 "73770de34a2a5ab34498d2e05b2b7fa0"
+    GIT_TAG v1.18
    CONFIGURE_COMMAND ""
    BUILD_COMMAND CXXFLAGS=-fPIC make -j ${NUM_OF_PROCESSOR} libleveldb.a
    INSTALL_COMMAND mkdir -p ${LEVELDB_INSTALL_DIR}/lib/ 
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -77,6 +77,8 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name']
 paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
 paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
 paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
 paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
 paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
 paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
 paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -169,9 +169,12 @@ cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor)
 cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
 if(WITH_DISTRIBUTE)
-  cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass variable_helper)
+    cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
-  set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
+        lod_rank_table feed_fetch_method sendrecvop_rpc  ${GLOB_DISTRIBUTE_DEPS} graph_to_program_pass variable_helper)
-  set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
+
   set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
   set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
 else()
  if(WITH_NGRAPH)
    if(NOT WIN32)
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@ -12,12 +12,19 @@ cc_library(multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc
 cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)
 if(WITH_DISTRIBUTE)
    if(NOT WITH_GRPC)
        set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
        set_source_files_properties(reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
    endif()
 endif()
 if(WITH_GPU)
    nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
            dynload_cuda variable_visitor)
    if(WITH_DISTRIBUTE)
        nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
-            ddim dynload_cuda selected_rows_functor sendrecvop_grpc)
+            ddim dynload_cuda selected_rows_functor sendrecvop_rpc)
    else()
        nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
            ddim dynload_cuda selected_rows_functor)
@ -30,7 +37,7 @@ else()
             variable_visitor)
    if(WITH_DISTRIBUTE)
        cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
-            ddim selected_rows_functor sendrecvop_grpc)
+            ddim selected_rows_functor sendrecvop_rpc)
    else()
        cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
            ddim selected_rows_functor)
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@ -157,9 +157,9 @@ void Executor::Close() {
 #ifdef PADDLE_WITH_DISTRIBUTE
  // TODO(typhoonzero): complete message will need to use real trainer_id,
  // except 0.
-  ::paddle::operators::distributed::RPCClient::GetInstance<
+  auto client =
-      ::paddle::operators::distributed::GRPCClient>(0)
+      paddle::operators::distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
-      ->SendComplete();
+  client->SendComplete();
 #endif
 }
--- a/paddle/fluid/framework/executor_thread_worker.cc
+++ b/paddle/fluid/framework/executor_thread_worker.cc
@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/fluid/inference/io.h"
 #include "paddle/fluid/platform/cpu_helper.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/pybind/pybind.h"
 namespace paddle {
@ -174,6 +175,8 @@ void print_fetch_var(Scope* scope, std::string var_name) {
 }
 void ExecutorThreadWorker::TrainFiles() {
  platform::SetNumThreads(1);
  // todo: configurable
  SetDevice();
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -42,6 +42,8 @@ pass_library(multi_batch_merge_pass base)
 pass_library(conv_bn_fuse_pass inference)
 pass_library(seqconv_eltadd_relu_fuse_pass inference)
 pass_library(is_test_pass base)
 pass_library(conv_elementwise_add_act_fuse_pass inference)
 pass_library(conv_elementwise_add2_act_fuse_pass inference)
 if(WITH_MKLDNN)
    pass_library(mkldnn_placement_pass base)
    pass_library(depthwise_conv_mkldnn_pass base)
--- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse.cc
@ -0,0 +1,106 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <string>
 #include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
 #define GET_NODES                      \
  GET_IR_NODE(conv_op);                \
  GET_IR_NODE(conv_out);               \
  GET_IR_NODE(conv_filter);            \
  GET_IR_NODE(elementwise_add_op);     \
  GET_IR_NODE(elementwise_add_in_y);   \
  GET_IR_NODE(elementwise_add_out);    \
  GET_IR_NODE(elementwise_add_op_1);   \
  GET_IR_NODE(elementwise_add_in_y_1); \
  GET_IR_NODE(elementwise_add_out_1);  \
  GET_IR_NODE(act_op);                 \
  GET_IR_NODE(act_out);
 // Inherient the basic infomation from `base_desc`, and modify some fields.
 framework::proto::OpDesc PrepareOpDesc(
    const framework::proto::OpDesc& base_desc, const std::string& bias,
    const std::string& bias1, const std::string& activation,
    const std::string& output) {
  auto proto = base_desc;
  framework::OpDesc desc(proto, nullptr);
  desc.SetInput("Bias", {bias});
  desc.SetInput("ResidualData", {bias1});
  desc.SetAttr("activation", activation);
  desc.SetOutput("Output", {output});
  desc.SetAttr("is_test", true);
  desc.SetAttr("use_cudnn", false);
  return *desc.Proto();
 }
 std::unique_ptr<ir::Graph> ConvElementwiseAddActFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  const std::string pattern_name = "conv_elementwise_add_act_fuse";
  FusePassBase::Init(pattern_name, graph.get());
  GraphPatternDetector gpd;
  auto* x = gpd.mutable_pattern()->NewNode("x")->AsInput()->assert_is_op_input(
      "conv2d", "Input");
  patterns::ConvElementwiseaddAct pattern(gpd.mutable_pattern(), pattern_name);
  pattern(x);
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    GET_NODES;
    auto base_op_desc = *conv_op->Op()->Proto();
    std::string bias_name = elementwise_add_in_y->Name();
    std::string bias1_name = elementwise_add_in_y_1->Name();
    std::string act_op_type = act_op->Op()->Type();
    std::string act_op_out = act_out->Name();
    auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, bias1_name,
                                      act_op_type, act_op_out);
    framework::OpDesc new_op_desc(new_op_proto, nullptr);
    // Create a new node for the fused op.
    auto new_conv_op = graph->CreateOpNode(&new_op_desc);
    // Link inputs and outputs.
    PADDLE_ENFORCE(subgraph.count(x));
    auto* conv_in_node = subgraph.at(x);
    IR_NODE_LINK_TO(conv_in_node, new_conv_op);            // Input
    IR_NODE_LINK_TO(conv_filter, new_conv_op);             // Filter
    IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op);    // Bias
    IR_NODE_LINK_TO(elementwise_add_in_y_1, new_conv_op);  // ResidualData
    IR_NODE_LINK_TO(new_conv_op, act_out);                 // Output
    // Delete the unneeded nodes.
    GraphSafeRemoveNodes(graph.get(),
                         {conv_op, elementwise_add_op, elementwise_add_op_1,
                          elementwise_add_out});
  };
  gpd(graph.get(), handler);
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(conv_elementwise_add2_act_fuse_pass,
              paddle::framework::ir::ConvElementwiseAdd2ActFusePass);
--- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
@ -0,0 +1,105 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h"
 #include <string>
 namespace paddle {
 namespace framework {
 namespace ir {
 #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
 #define GET_NODES                      \
  GET_IR_NODE(conv_op);                \
  GET_IR_NODE(conv_out);               \
  GET_IR_NODE(conv_filter);            \
  GET_IR_NODE(elementwise_add_op);     \
  GET_IR_NODE(elementwise_add_in_y);   \
  GET_IR_NODE(elementwise_add_out);    \
  GET_IR_NODE(elementwise_add_op_1);   \
  GET_IR_NODE(elementwise_add_in_y_1); \
  GET_IR_NODE(elementwise_add_out_1);  \
  GET_IR_NODE(act_op);                 \
  GET_IR_NODE(act_out);
 // Inherient the basic infomation from `base_desc`, and modify some fields.
 framework::proto::OpDesc PrepareOpDesc(
    const framework::proto::OpDesc& base_desc, const std::string& bias,
    const std::string& bias1, const std::string& activation,
    const std::string& output) {
  auto proto = base_desc;
  framework::OpDesc desc(proto, nullptr);
  desc.SetInput("Bias", {bias});
  desc.SetInput("ResidualData", {bias1});
  desc.SetAttr("activation", activation);
  desc.SetOutput("Output", {output});
  desc.SetAttr("is_test", true);
  return *desc.Proto();
 }
 std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  const std::string pattern_name = "conv_elementwise_add_act_fuse";
  FusePassBase::Init(pattern_name, graph.get());
  GraphPatternDetector gpd;
  auto* x = gpd.mutable_pattern()->NewNode("x")->AsInput()->assert_is_op_input(
      "conv2d", "Input");
  patterns::ConvElementwiseadd2Act pattern(gpd.mutable_pattern(), pattern_name);
  pattern(x);
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    GET_NODES;
    auto base_op_desc = *conv_op->Op()->Proto();
    std::string bias_name = elementwise_add_in_y->Name();
    std::string bias1_name = elementwise_add_in_y_1->Name();
    std::string act_op_type = act_op->Op()->Type();
    std::string act_op_out = act_out->Name();
    auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, bias1_name,
                                      act_op_type, act_op_out);
    framework::OpDesc new_op_desc(new_op_proto, nullptr);
    // Create a new node for the fused op.
    graph->CreateOpNode(&new_op_desc);
    // Link inputs and outputs.
    PADDLE_ENFORCE(subgraph.count(x));
    auto* conv_in_node = subgraph.at(x);
    IR_NODE_LINK_TO(conv_in_node, conv_op);            // Input
    IR_NODE_LINK_TO(conv_filter, conv_op);             // Filter
    IR_NODE_LINK_TO(conv_op, conv_out);                // Output
    IR_NODE_LINK_TO(elementwise_add_in_y, conv_op);    // Bias
    IR_NODE_LINK_TO(elementwise_add_in_y_1, conv_op);  // Bias
    // Delete the unneeded nodes.
    GraphSafeRemoveNodes(graph.get(),
                         {conv_op, elementwise_add_op, elementwise_add_op_1,
                          elementwise_add_out});
  };
  gpd(graph.get(), handler);
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(conv_elementwise_add2_act_fuse_pass,
              paddle::framework::ir::ConvElementwiseAdd2ActFusePass);
--- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h
@ -0,0 +1,33 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 class ConvElementwiseAdd2ActFusePass : public FusePassBase {
 public:
  virtual ~ConvElementwiseAdd2ActFusePass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
@ -0,0 +1,104 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h"
 #include <string>
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
 #define GET_NODES                    \
  GET_IR_NODE(conv_op);              \
  GET_IR_NODE(conv_out);             \
  GET_IR_NODE(conv_filter);          \
  GET_IR_NODE(elementwise_add_op);   \
  GET_IR_NODE(elementwise_add_in_y); \
  GET_IR_NODE(elementwise_add_out);  \
  GET_IR_NODE(act_op);               \
  GET_IR_NODE(act_out);
 // Inherient the basic infomation from `base_desc`, and modify some fields.
 framework::proto::OpDesc PrepareOpDesc(
    const framework::proto::OpDesc& base_desc, const std::string& bias,
    const std::string& activation, const std::string& output) {
  auto proto = base_desc;
  framework::OpDesc desc(proto, nullptr);
  desc.SetType("conv2d_fusion");
  desc.SetInput("Bias", {bias});
  desc.SetInput("ResidualData", {});
  desc.SetAttr("activation", activation);
  desc.SetOutput("Output", {output});
  desc.SetAttr("is_test", true);
  desc.SetAttr("use_cudnn", false);
  desc.Flush();
  return *desc.Proto();
 }
 std::unique_ptr<ir::Graph> ConvElementwiseAddActFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  const std::string pattern_name = "conv_elementwise_add_act_fuse";
  FusePassBase::Init(pattern_name, graph.get());
  GraphPatternDetector gpd;
  auto* x = gpd.mutable_pattern()
                ->NewNode("x")
                ->assert_is_op_input("conv2d", "Input")
                ->AsInput();
  patterns::ConvElementwiseaddAct pattern(gpd.mutable_pattern(), pattern_name);
  pattern(x);
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    GET_NODES;
    auto base_op_desc = *conv_op->Op()->Proto();
    std::string bias_name = elementwise_add_in_y->Name();
    std::string act_op_type = act_op->Op()->Type();
    std::string act_op_out = act_out->Name();
    auto new_op_proto =
        PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out);
    framework::OpDesc new_op_desc(new_op_proto, nullptr);
    // Create a new node for the fused op.
    auto* new_conv_op = graph->CreateOpNode(&new_op_desc);
    // Link inputs and outputs.
    PADDLE_ENFORCE(subgraph.count(x));
    auto* conv_in_node = subgraph.at(x);
    IR_NODE_LINK_TO(conv_in_node, new_conv_op);          // Input
    IR_NODE_LINK_TO(conv_filter, new_conv_op);           // Filter
    IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op);  // Bias
    IR_NODE_LINK_TO(new_conv_op, act_out);               // Output
    // Delete the unneeded nodes.
    GraphSafeRemoveNodes(graph.get(), {conv_op, conv_out, elementwise_add_op,
                                       elementwise_add_out, act_op});
  };
  gpd(graph.get(), handler);
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(conv_elementwise_add_act_fuse_pass,
              paddle::framework::ir::ConvElementwiseAddActFusePass);
--- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h
@ -0,0 +1,33 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 class ConvElementwiseAddActFusePass : public FusePassBase {
 public:
  virtual ~ConvElementwiseAddActFusePass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@ -17,6 +17,7 @@
 #include <string>
 #include <vector>
 #include "graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/graph_traits.h"
@ -25,6 +26,7 @@
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/string/pretty_log.h"
 #include "paddle/fluid/string/printf.h"
 namespace paddle {
 namespace framework {
 namespace ir {
@ -104,7 +106,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
  for (auto &node : GraphTraits::DFS(graph)) {
    for (const auto &pdnode : pattern_.nodes()) {
      if (pdnode->Tell(&node)) {
-        VLOG(4) << "pdnode " << pdnode->name() << " marked";
+        VLOG(4) << "Node " << node.Name() << " marked as " << pdnode->name();
        pdnodes2nodes_[pdnode.get()].insert(&node);
      }
    }
@ -1099,6 +1101,115 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
  return out_var;
 }
 std::unordered_set<std::string> conv_act_set({"identity", "sigmoid", "relu",
                                              "relu6", "relux", "tanh",
                                              "band_pass"});
 PDNode *patterns::ConvElementwiseaddAct::operator()(PDNode *conv_in) {
  conv_in->AsInput();
  auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
  auto conv_out = pattern->NewNode(conv_out_repr())
                      ->assert_is_op_output("conv2d")
                      ->assert_is_op_input("elementwise_add", "X")
                      ->AsIntermediate();
  auto conv_filter = pattern->NewNode(conv_filter_repr())
                         ->assert_is_op_input("conv2d", "Filter")
                         ->AsInput();
  auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr())
                                ->assert_is_op("elementwise_add");
  auto elementwise_add_in_y = pattern->NewNode(elementwise_add_in_y_repr())
                                  ->assert_is_op_input("elementwise_add", "Y")
                                  ->AsInput();
  auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr())
                                 ->assert_is_op_output("elementwise_add")
                                 ->AsIntermediate();
  auto act_op = pattern->NewNode(act_op_repr())
                    ->assert_is_op()
                    ->assert_more([&](Node *node) {
                      auto op_type = node->Name();
                      return conv_act_set.count(op_type);
                    });
  auto act_out = pattern->NewNode(act_out_repr())
                     ->assert_is_var()
                     // is activation op's output.
                     ->assert_more([&](Node *node) {
                       for (auto *in_op : node->inputs) {
                         if (conv_act_set.count(in_op->Name())) {
                           return true;
                         }
                       }
                       return false;
                     })
                     ->AsOutput();
  conv_op->LinksFrom({conv_in, conv_filter});
  conv_out->LinksFrom({conv_op});
  elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y})
      .LinksTo({elementwise_add_out});
  act_op->LinksFrom({elementwise_add_out}).LinksTo({act_out});
  return act_out;
 }
 PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
  auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
  auto conv_filter = pattern->NewNode(conv_filter_repr())
                         ->assert_is_op_input("conv2d", "Filter")
                         ->AsInput();
  auto conv_out = pattern->NewNode(conv_out_repr())
                      ->assert_is_op_output("conv2d")
                      ->assert_is_op_input("elementwise_add", "X")
                      ->AsIntermediate();
  auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr())
                                ->assert_is_op("elementwise_add");
  auto elementwise_add_in_y = pattern->NewNode(elementwise_add_in_y_repr())
                                  ->assert_is_op_input("elementwise_add", "Y")
                                  ->AsInput();
  auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr())
                                 ->assert_is_op_output("elementwise_add")
                                 ->assert_is_op_input("elementwise_add", "X")
                                 ->AsIntermediate();
  auto elementwise_add_op_1 = pattern->NewNode(elementwise_add_op_1_repr())
                                  ->assert_is_op("elementwise_add");
  auto elementwise_add_in_y_1 = pattern->NewNode(elementwise_add_in_y_1_repr())
                                    ->assert_is_op_input("elementwise_add", "Y")
                                    ->AsInput();
  auto elementwise_add_out_1 = pattern->NewNode(elementwise_add_out_1_repr())
                                   ->assert_is_op_output("elementwise_add")
                                   ->AsIntermediate();
  auto act_op = pattern->NewNode(act_op_repr())
                    ->assert_is_op()
                    ->assert_more([&](Node *node) {
                      auto op_type = node->Name();
                      return conv_act_set.count(op_type);
                    });
  auto act_out = pattern->NewNode(act_out_repr())
                     ->assert_is_var()
                     // is activation op's output.
                     ->assert_more([&](Node *node) {
                       for (auto *in_op : node->inputs) {
                         if (conv_act_set.count(in_op->Name())) {
                           return true;
                         }
                       }
                       return false;
                     })
                     ->AsOutput();
  conv_op->LinksFrom({conv_in, conv_filter}).LinksTo({conv_out});
  elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y})
      .LinksTo({elementwise_add_out});
  elementwise_add_op_1->LinksFrom(
      {elementwise_add_out, elementwise_add_in_y_1});
  act_op->LinksFrom({elementwise_add_out_1}).LinksTo({act_out});
  return act_out;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@ -671,6 +671,51 @@ struct ElementwiseAdd : public PatternBase {
  PATTERN_DECL_NODE(elementwise_add_y);
  PATTERN_DECL_NODE(elementwise_add_out);
 };
 // Conv + ElementwiseAdd + an activation
 // This pattern can futher fuse the conv related ops after the conv+bn fusion.
 struct ConvElementwiseaddAct : public PatternBase {
  ConvElementwiseaddAct(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "conv_elementwiseadd_act") {}
  PDNode* operator()(PDNode* conv_in);
  PATTERN_DECL_NODE(conv_op);
  PATTERN_DECL_NODE(conv_out);
  PATTERN_DECL_NODE(conv_filter);
  PATTERN_DECL_NODE(elementwise_add_op);
  PATTERN_DECL_NODE(elementwise_add_in_y);  // input
  PATTERN_DECL_NODE(elementwise_add_out);
  PATTERN_DECL_NODE(act_op);
  PATTERN_DECL_NODE(act_out);
 };
 // Conv + ElementwiseAdd + ElementwiseAdd + Activation
 struct ConvElementwiseadd2Act : public PatternBase {
  ConvElementwiseadd2Act(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope,
                    "conv_elementwiseadd2_elementwiseadd_act") {}
  PDNode* operator()(PDNode* conv_in);
  PATTERN_DECL_NODE(conv_op);
  PATTERN_DECL_NODE(conv_filter);
  PATTERN_DECL_NODE(conv_out);
  PATTERN_DECL_NODE(elementwise_add_op);
  PATTERN_DECL_NODE(elementwise_add_in_y);  // input
  PATTERN_DECL_NODE(elementwise_add_out);
  PATTERN_DECL_NODE(elementwise_add_op_1);
  PATTERN_DECL_NODE(elementwise_add_in_y_1);  // input
  PATTERN_DECL_NODE(elementwise_add_out_1);
  PATTERN_DECL_NODE(act_op);
  PATTERN_DECL_NODE(act_out);
 };
 }  // namespace patterns
 // Link two ir::Nodes from each other.
--- a/paddle/fluid/inference/api/analysis_predictor_tester.cc
+++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc
@ -55,7 +55,12 @@ TEST(AnalysisPredictor, analysis_off) {
 }
 TEST(AnalysisPredictor, analysis_on) {
-  AnalysisConfig config(false);
+#ifdef PADDLE_WITH_CUDA
  AnalysisConfig config(true);
  config.fraction_of_gpu_memory = 0.15;
 #else
  AnalysisConfig config;
 #endif
  config.model_dir = FLAGS_dirname;
  config.enable_ir_optim = true;
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@ -118,7 +118,10 @@ class GpuPassStrategy : public PassStrategy {
 public:
  GpuPassStrategy() : PassStrategy({}) {
    passes_.assign({
-        "infer_clean_graph_pass", "conv_bn_fuse_pass",
+        "infer_clean_graph_pass",               //
        "conv_bn_fuse_pass",                    //
        "conv_elementwise_add_act_fuse_pass",   //
        "conv_elementwise_add2_act_fuse_pass",  //
    });
  }
--- a/paddle/fluid/inference/io.cc
+++ b/paddle/fluid/inference/io.cc
@ -79,7 +79,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
  for (auto* var : global_block.AllVars()) {
    if (IsPersistable(var)) {
-      VLOG(3) << "persistable variable's name: " << var->Name();
+      VLOG(4) << "persistable variable's name: " << var->Name();
      framework::VarDesc* new_var = load_block->Var(var->Name());
      new_var->SetShape(var->GetShape());
--- a/paddle/fluid/inference/tests/api/trt_models_tester.cc
+++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc
@ -78,6 +78,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
  std::vector<PaddleTensor> outputs;
  if (use_analysis || use_tensorrt) {
    contrib::AnalysisConfig config(true);
    config.pass_builder()->TurnOnDebug();
    SetConfig<contrib::AnalysisConfig>(&config, model_dir, true, use_tensorrt,
                                       FLAGS_batch_size);
    TestPrediction(reinterpret_cast<PaddlePredictor::Config*>(&config),
@ -141,9 +142,31 @@ TEST(TensorRT_resnext50, profile) {
  profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
 }
 TEST(resnext50, compare_analysis_native) {
  std::string model_dir = FLAGS_infer_model + "/resnext50";
  compare(model_dir, false /*use tensorrt*/);
 }
 TEST(TensorRT_mobilenet, analysis) {
  std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
-  compare(model_dir, /* use_tensorrt */ false);
+  compare(model_dir, false /* use_tensorrt */);
 }
 TEST(AnalysisPredictor, use_gpu) {
  std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
  AnalysisConfig config(true);
  config.model_dir = model_dir;
  config.fraction_of_gpu_memory = 0.15;
  config.pass_builder()->TurnOnDebug();
  std::vector<std::vector<PaddleTensor>> inputs_all;
  auto predictor = CreatePaddlePredictor(config);
  SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
  std::vector<PaddleTensor> outputs;
  for (auto& input : inputs_all) {
    ASSERT_TRUE(predictor->Run(input, &outputs));
  }
 }
 }  // namespace inference
--- a/paddle/fluid/operators/controlflow/CMakeLists.txt
+++ b/paddle/fluid/operators/controlflow/CMakeLists.txt
@ -1,4 +1,4 @@
 include(operators)
-register_operators()
+register_operators(DEPS naive_executor)
 file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@ -44,7 +44,9 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
  PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
-                 "Conv intput should be 4-D or 5-D tensor.");
+                 "Conv intput should be 4-D or 5-D tensor, get %u",
                 in_dims.size());
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
      "Conv input dimension and filter dimension should be the same.");
--- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc
+++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc
@ -300,9 +300,11 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
    }
    CudnnRNNCache *cudnn_rnn_cache = nullptr;
    if (cache_var->IsInitialized()) {
      // const_cast is usually bad.
      cudnn_rnn_cache = const_cast<framework::Variable *>(cache_var)
                            ->GetMutable<CudnnRNNCache>();
    } else {
      // const_cast is usually bad.
      cudnn_rnn_cache = const_cast<framework::Variable *>(cache_var)
                            ->GetMutable<CudnnRNNCache>();
      std::random_device rnd;
--- a/Show More
+++ b/Show More