Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into rewrite_allocation

7 years ago · 461f71a90b
parent 9dcddf92f2 0e24138494
commit 461f71a90b
199 changed files with 13539 additions and 1736 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -127,6 +127,9 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
 set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
  "A path setting fluid shared and static libraries")

+set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING
+  "A path setting fluid inference shared and static libraries")
+
 if (WITH_C_API AND WITH_PYTHON)
  message(WARNING "It is suggest not embedded a python interpreter in Paddle "
    "when using C-API. It will give an unpredictable behavior when using a "
--- a/README.md
+++ b/README.md
@ -2,8 +2,8 @@


 [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.0/getstarted/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html)
 [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
 [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)

@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.


-### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0)
+### Latest PaddlePaddle Release: [Fluid 1.0.1](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
 ### Install Latest Stable Release:
 ```
 # Linux CPU
@ -27,9 +27,9 @@ pip install paddlepaddle
 # Linux GPU cuda9cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda8cudnn7
-pip install paddlepaddle-gpu==0.15.0.post87
+pip install paddlepaddle-gpu==1.0.1.post87
 # Linux GPU cuda8cudnn5
-pip install paddlepaddle-gpu==0.15.0.post85
+pip install paddlepaddle-gpu==1.0.1.post85

 # For installation on other platform, refer to http://paddlepaddle.org/
 ```
@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.15.0.post85

 ## Installation

-It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website.
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) on our website.

 ## Documentation

-We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and
-[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation.
+We provide [English](http://paddlepaddle.org/documentation/docs/en/1.0.0/getstarted/index_en.html) and
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) documentation.

 - [Deep Learning 101](https://github.com/PaddlePaddle/book)

  You might want to start from this online interactive book that can run in a Jupyter Notebook.

- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.0/user_guides/howto/training/cluster_howto.html)

  You can run distributed training jobs on MPI clusters.

- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html)
+- [Python API](http://paddlepaddle.org/documentation/api/zh/1.0/fluid.html)

   Our new API enables much shorter programs.

- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.0/advanced_usage/development/contribute_to_paddle.html)

   We appreciate your contributions!

--- a/benchmark/fluid/run.sh
+++ b/benchmark/fluid/run.sh
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -311,6 +311,8 @@ function(cc_test TARGET_NAME)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
+    # No unit test should exceed 10 minutes.
+    set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
  endif()
 endfunction(cc_test)

@ -629,6 +631,8 @@ function(py_test TARGET_NAME)
             PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
             ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    # No unit test should exceed 10 minutes.
+    set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
  endif()
 endfunction()

--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -18,7 +18,7 @@ function(copy TARGET)
    set(oneValueArgs "")
    set(multiValueArgs SRCS DSTS DEPS)
    cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-    set(inference_lib_dist_dep ${TARGET} ${inference_lib_dist_dep} PARENT_SCOPE)
+    set(fluid_lib_dist_dep ${TARGET} ${fluid_lib_dist_dep} PARENT_SCOPE)

    list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
    list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
@ -150,16 +150,16 @@ if (WITH_ANAKIN AND WITH_MKL)
        SRCS
        ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
        ${ANAKIN_INSTALL_DIR} # anakin release
-        DSTS ${dst_dir}/inference/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
+        DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
     list(APPEND inference_deps anakin_inference_lib)
 endif()

 set(module "inference")
 copy(inference_lib DEPS ${inference_deps}
  SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
-       ${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci
+       ${src_dir}/${module}/api/paddle_inference_api.h
       ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
 )

 set(module "platform")
@ -185,20 +185,41 @@ copy(cmake_cache
  SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
  DSTS ${FLUID_INSTALL_DIR})

-add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) 
+# This command generates a complete fluid library for both train and inference
+add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) 
+
+# Following commands generate a inference-only fluid library
+# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
+copy(third_party DEPS fluid_lib_dist
+  SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
+  DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
+)
+
+# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
+copy(inference_api_lib DEPS fluid_lib_dist
+  SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
+       ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
+  DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
+)
+
+add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)

 # paddle fluid version
-execute_process(
-  COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
-  WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
-  OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
-set(version_file ${FLUID_INSTALL_DIR}/version.txt)
-file(WRITE ${version_file}
-  "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
-  "WITH_MKL: ${WITH_MKL}\n"
-  "WITH_GPU: ${WITH_GPU}\n")
-if(WITH_GPU)
-  file(APPEND ${version_file}
-    "CUDA version: ${CUDA_VERSION}\n"
-    "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
-endif()
+function(version version_file)
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
+    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
+    OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
+  file(WRITE ${version_file}
+    "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
+    "WITH_MKL: ${WITH_MKL}\n"
+    "WITH_MKLDNN: ${WITH_MKLDNN}\n"
+    "WITH_GPU: ${WITH_GPU}\n")
+  if(WITH_GPU)
+    file(APPEND ${version_file}
+      "CUDA version: ${CUDA_VERSION}\n"
+      "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
+  endif()
+endfunction()
+version(${FLUID_INSTALL_DIR}/version.txt)
+version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt)
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -61,12 +61,12 @@ paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None
 paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100))
 paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None))
+paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))
 paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
 paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
 paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn'], varargs=None, keywords=None, defaults=(None, None, False))
-paddle.fluid.layers.softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(None, None, True, None))
+paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
+paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
 paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
 paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
 paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False))
@ -75,7 +75,8 @@ paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'outp
 paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
 paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
 paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
 paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
 paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
@ -84,6 +85,7 @@ paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name']
 paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
 paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
 paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
 paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
@ -95,8 +97,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
 paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
-paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None))
+paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
 paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
@ -114,6 +116,7 @@ paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], var
 paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
 paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
 paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
+paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
 paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
 paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR'))
 paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
@ -127,6 +130,7 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None
 paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
 paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
 paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
 paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
@ -170,6 +174,7 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
 paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
 paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
 paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@ -178,7 +183,6 @@ paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, k
 paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
 paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
 paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
-paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
 paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
 paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
--- a/paddle/fluid/framework/details/op_handle_base.h
+++ b/paddle/fluid/framework/details/op_handle_base.h
@ -64,7 +64,8 @@ class OpHandleBase {
  virtual bool IsMultiDeviceTransfer() { return false; }

  const platform::DeviceContext *DeviceContext(platform::Place place) {
-    return dev_ctxes_[place];
+    auto it = dev_ctxes_.find(place);
+    return it != dev_ctxes_.end() ? it->second : nullptr;
  }

  void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) {
--- a/paddle/fluid/framework/details/var_handle.h
+++ b/paddle/fluid/framework/details/var_handle.h
@ -49,6 +49,8 @@ struct VarHandleBase {

  void AddOutput(OpHandleBase* out, ir::Node* node) {
    if (pending_ops_.find(out) == pending_ops_.end()) {
+      PADDLE_ENFORCE(out != nullptr, "The output of %s should not be nullptr",
+                     this->Node()->Name());
      pending_ops_.insert(out);
      node_->outputs.push_back(node);
    }
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@ -46,6 +46,41 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
  VLOG(5) << "destroy ExecutorPrepareContext";
 }

+template <typename RefCntMap>
+static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
+                                GarbageCollector<Tensor>* gc,
+                                RefCntMap* ref_cnts) {
+  std::unordered_set<Tensor*> erase_tensors;
+
+  auto handler = [&](const VariableNameMap& name_map) {
+    for (auto& name_pair : name_map) {
+      for (auto& name : name_pair.second) {
+        auto it = ref_cnts->find(name);
+        if (it == ref_cnts->end()) continue;
+        if ((it->second)-- == 1) {
+          auto* var = scope.FindVar(name);
+          if (var != nullptr) {
+            VLOG(10) << "Erase tensor \'" << name << "\'";
+            if (var->IsType<LoDTensor>()) {
+              erase_tensors.insert(var->GetMutable<LoDTensor>());
+            } else if (var->IsType<SelectedRows>()) {
+              erase_tensors.insert(
+                  var->GetMutable<SelectedRows>()->mutable_value());
+            }
+          }
+        }
+      }
+    }
+  };
+
+  handler(op->Inputs());
+  handler(op->Outputs());
+
+  if (!erase_tensors.empty()) {
+    gc->Add(erase_tensors);
+  }
+}
+
 Executor::Executor(const platform::Place& place) : place_(place) {}

 void Executor::Close() {
@ -66,7 +101,7 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
-    var->GetMutable<std::vector<framework::Scope>>();
+    var->GetMutable<std::vector<framework::Scope*>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
@ -331,9 +366,13 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
  }

  int64_t max_memory_size = GetEagerDeletionThreshold();
-
  std::unique_ptr<GarbageCollector<Tensor>> gc;
-  if (max_memory_size >= 0) {
+  // WhileOp would set keep_kids to false
+  // WhileGradOp would need the scopes created in WhileOp
+  // Perhaps, we should not perform eager deletion in WhileOp
+  // The scopes and variables created by WhileOp would be deleted
+  // in WhileGradOp.
+  if (max_memory_size >= 0 && !keep_kids) {
    ctx->ResetReferenceCount();
 #ifdef PADDLE_WITH_CUDA
    if (platform::is_gpu_place(place_)) {
@ -352,45 +391,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
    op->Run(*local_scope, place_);

    if (gc != nullptr) {
-      std::vector<std::string> erase_vars;
-      for (auto& input : op->Inputs()) {
-        for (auto& input_name : input.second) {
-          auto it = ctx->cur_ref_cnts_.find(input_name);
-          if (it == ctx->cur_ref_cnts_.end()) continue;
-          if (it->second == 1) {  // should delete it
-            erase_vars.emplace_back(input_name);
-            ctx->cur_ref_cnts_.erase(input_name);
-          } else {
-            --(it->second);
-          }
-        }
-      }
-
-      for (auto& output : op->Outputs()) {
-        for (auto& output_name : output.second) {
-          auto it = ctx->cur_ref_cnts_.find(output_name);
-          if (it == ctx->cur_ref_cnts_.end()) continue;
-          if (it->second == 1) {
-            erase_vars.emplace_back(output_name);
-            ctx->cur_ref_cnts_.erase(output_name);
-          } else {
-            --(it->second);
-          }
-        }
-      }
-
-      if (!erase_vars.empty()) {
-        std::vector<framework::LoDTensor*> erase_tensors;
-        for (auto& name : erase_vars) {
-          auto* var = local_scope->FindVar(name);
-          if (var == nullptr) continue;
-          if (var->IsType<framework::LoDTensor>()) {
-            auto* tensor = var->GetMutable<framework::LoDTensor>();
-            erase_tensors.push_back(tensor);
-          }
-        }
-        if (!erase_tensors.empty()) gc->Add(erase_tensors);
-      }
+      DeleteUnusedTensors(*local_scope, op.get(), gc.get(),
+                          &(ctx->cur_ref_cnts_));
    }
  }

--- a/paddle/fluid/framework/executor.h
+++ b/paddle/fluid/framework/executor.h
@ -32,38 +32,32 @@ template <typename T>
 std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
    const ProgramDesc& prog, size_t block_id) {
  auto& block = prog.Block(block_id);
-  std::unordered_set<std::string> ignored_vars;
  std::unordered_map<std::string, T> ref_cnts;

-  for (auto var_desc : block.AllVars()) {
-    auto type = var_desc->Proto()->type().type();
-    if (type != proto::VarType::LOD_TENSOR || var_desc->Persistable()) {
-      ignored_vars.insert(var_desc->Name());  // ignore persistable vars
-    }
-  }
-
-  for (auto op_desc : block.AllOps()) {
-    for (auto& input : op_desc->Inputs()) {
-      for (auto& input_name : input.second) {
-        if (!ignored_vars.count(input_name)) {
-          if (ref_cnts.count(input_name))
-            ++ref_cnts[input_name];
-          else
-            ref_cnts[input_name] = 1;
+  auto update_ref_cnts = [&](OpDesc* op_desc, const VariableNameMap& name_map) {
+    for (auto& name_pair : name_map) {
+      for (auto& name : name_pair.second) {
+        auto* var_desc = block.FindVar(name);
+        if (var_desc == nullptr || var_desc->Persistable()) continue;
+        auto type = var_desc->Proto()->type().type();
+        if (type != proto::VarType::LOD_TENSOR &&
+            type != proto::VarType::SELECTED_ROWS) {
+          continue;
        }
-      }
-    }

-    for (auto& output : op_desc->Outputs()) {
-      for (auto output_name : output.second) {
-        if (!ignored_vars.count(output_name)) {
-          if (ref_cnts.count(output_name))
-            ++ref_cnts[output_name];
-          else
-            ref_cnts[output_name] = 1;
+        auto it = ref_cnts.find(name);
+        if (it != ref_cnts.end()) {
+          ++it->second;
+        } else {
+          ref_cnts[name] = 1;
        }
      }
    }
+  };
+
+  for (auto op_desc : block.AllOps()) {
+    update_ref_cnts(op_desc, op_desc->Inputs());
+    update_ref_cnts(op_desc, op_desc->Outputs());
  }
  return ref_cnts;
 }
--- a/paddle/fluid/framework/feed_fetch_method.cc
+++ b/paddle/fluid/framework/feed_fetch_method.cc
@ -27,8 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
  // be created.
  VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
  Variable* g_feed_value = scope->Var(var_name);
-  auto& feed_inputs =
-      *(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
+  auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
  if (index >= feed_inputs.size()) {
    feed_inputs.resize(index + 1);
  }
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -10,7 +10,7 @@ function(pass_library TARGET DEST)
    set(oneValueArgs "")
    set(multiValueArgs SRCS DEPS)
    cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-    cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass ${op_library_DEPS})
+    cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base ${op_library_DEPS})
    # add more DEST here, such as train, dist and collect USE_PASS into a file automatically.
    if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference")
        message(STATUS "add pass ${TARGET} ${DEST}")
@ -25,21 +25,29 @@ cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
 cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
 cc_library(graph_traits SRCS graph_traits.cc DEPS graph)
 cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits)
+cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass)

 pass_library(graph_to_program_pass base)
 pass_library(graph_viz_pass base)
 pass_library(fc_fuse_pass inference)
-if (WITH_MKLDNN)
-    pass_library(conv_relu_mkldnn_fuse_pass inference)
-endif ()
 pass_library(attention_lstm_fuse_pass inference)
 pass_library(infer_clean_graph_pass inference)
 pass_library(fc_lstm_fuse_pass inference)
 pass_library(embedding_fc_lstm_fuse_pass inference)
 pass_library(fc_gru_fuse_pass inference)
 pass_library(seq_concat_fc_fuse_pass inference)
+pass_library(conv_bn_fuse_pass inference)
+pass_library(seqconv_eltadd_relu_fuse_pass inference)
+if(WITH_MKLDNN)
+    pass_library(mkldnn_placement_pass base)
+    pass_library(conv_bias_mkldnn_fuse_pass inference)
+    pass_library(conv_relu_mkldnn_fuse_pass inference)
+endif()

 cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector )
+if(WITH_MKLDNN)
+  pass_library(conv_elementwise_add_mkldnn_fuse_pass inference)
+endif()

 set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")

@ -53,4 +61,5 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
 cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
 if (WITH_MKLDNN)
    cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
+    cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
 endif ()
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@ -262,7 +262,7 @@ std::unique_ptr<ir::Graph> AttentionLSTMFusePass::ApplyImpl(
  std::unordered_set<std::string> specified_vars({"data_lod_attention",
                                                  "cell_init", "hidden_init",
                                                  "data", "week", "minute"});
-  int count = 0;
+  size_t count = 0;
  for (auto* node : graph->Nodes()) {
    if (node->IsVar() && specified_vars.count(node->Name())) {
      ++count;
--- a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
@ -0,0 +1,137 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h"
+#include <functional>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+template <typename BinaryOperation>
+LoDTensor tensor_apply_eltwise(const LoDTensor& vec_a, const LoDTensor& vec_b,
+                               BinaryOperation f) {
+  PADDLE_ENFORCE_EQ(vec_a.dims(), vec_b.dims());
+  LoDTensor vec_y;
+  vec_y.Resize(vec_a.dims());
+  const float* a = vec_a.data<float>();
+  const float* b = vec_b.data<float>();
+  float* y = vec_y.mutable_data<float>(platform::CPUPlace());
+  for (int i = 0; i < vec_a.numel(); i++) {
+    y[i] = f(a[i], b[i]);
+  }
+  return vec_y;
+}
+
+std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
+    std::unique_ptr<ir::Graph> graph) const {
+  PADDLE_ENFORCE(graph.get());
+  FusePassBase::Init(name_scope_, graph.get());
+
+  auto* scope = param_scope();
+  PADDLE_ENFORCE(scope);
+
+  GraphPatternDetector gpd;
+  auto* conv_input =
+      gpd.mutable_pattern()
+          ->NewNode(patterns::PDNodeName(name_scope_, "conv_input"))
+          ->AsInput()
+          ->assert_is_op_input("conv2d", "Input");
+  patterns::ConvBias conv_bias_pattern(gpd.mutable_pattern(), name_scope_);
+  conv_bias_pattern(conv_input);
+  int found_conv_bias_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "handle ConvBias fuse";
+    GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
+                              conv_bias_pattern);                      // Filter
+    GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern);  // tmp
+    GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_bias_pattern);  // CONV op
+    // bias
+    GET_IR_NODE_FROM_SUBGRAPH(eltwise_bias, eltwise_bias, conv_bias_pattern);
+    // output
+    GET_IR_NODE_FROM_SUBGRAPH(eltwise_out, eltwise_out, conv_bias_pattern);
+    // elementwise_add op
+    GET_IR_NODE_FROM_SUBGRAPH(eltwise, eltwise, conv_bias_pattern);
+
+    PADDLE_ENFORCE(subgraph.count(conv_input));
+
+    // check if fuse can be done and if MKL-DNN should be used
+    FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
+    if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
+      VLOG(3) << "do not perform conv+bias fuse";
+      return;
+    }
+
+    auto* eltwise_bias_tensor =
+        scope->FindVar(eltwise_bias->Name())->GetMutable<LoDTensor>();
+
+    auto input_names = conv->Op()->InputNames();
+    bool has_bias = std::find(input_names.begin(), input_names.end(), "Bias") !=
+                    input_names.end();
+    if (has_bias && conv->Op()->Input("Bias").size() > 0) {
+      auto conv_bias_names = conv->Op()->Input("Bias");
+      // add eltwise bias to existing conv bias
+      PADDLE_ENFORCE_EQ(conv_bias_names.size(), 1);
+      auto* conv_bias_var = scope->FindVar(conv_bias_names[0]);
+      auto* conv_bias_tensor = conv_bias_var->GetMutable<LoDTensor>();
+      PADDLE_ENFORCE_EQ(conv_bias_tensor->dims(), eltwise_bias_tensor->dims());
+      *conv_bias_tensor = tensor_apply_eltwise(
+          *conv_bias_tensor, *eltwise_bias_tensor, std::plus<float>());
+
+      conv->Op()->SetOutput("Output",
+                            std::vector<std::string>({eltwise_out->Name()}));
+
+      GraphSafeRemoveNodes(graph.get(), {eltwise, conv_out});
+
+      IR_NODE_LINK_TO(conv, eltwise_out);
+    } else {
+      // take eltwise bias as conv bias
+      OpDesc desc;
+
+      desc.SetInput(
+          "Input", std::vector<std::string>({subgraph.at(conv_input)->Name()}));
+      desc.SetInput("Filter", std::vector<std::string>({conv_weight->Name()}));
+      desc.SetInput("Bias", std::vector<std::string>({eltwise_bias->Name()}));
+      desc.SetOutput("Output", std::vector<std::string>({eltwise_out->Name()}));
+      desc.SetType("conv2d");
+
+      for (auto& attr : conv->Op()->GetAttrMap()) {
+        desc.SetAttr(attr.first, attr.second);
+      }
+      auto conv_bias_node = g->CreateOpNode(&desc);
+
+      IR_NODE_LINK_TO(subgraph.at(conv_input), conv_bias_node);
+      IR_NODE_LINK_TO(conv_weight, conv_bias_node);
+      IR_NODE_LINK_TO(eltwise_bias, conv_bias_node);
+      IR_NODE_LINK_TO(conv_bias_node, eltwise_out);
+
+      GraphSafeRemoveNodes(graph.get(), {conv, eltwise, conv_out});
+    }
+
+    found_conv_bias_count++;
+  };
+  gpd(graph.get(), handler);
+  AddStatis(found_conv_bias_count);
+  return graph;
+}
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+REGISTER_PASS(conv_bias_mkldnn_fuse_pass,
+              paddle::framework::ir::ConvBiasFusePass);
--- a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h
@ -0,0 +1,36 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <string>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/pass.h"
+namespace paddle {
+namespace framework {
+namespace ir {
+/*
+* Fuse the Conv and Elementwise_add to a ConvBiasOp.
+*/
+class ConvBiasFusePass : public FusePassBase {
+ public:
+  virtual ~ConvBiasFusePass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
+  const std::string name_scope_{"conv_bias_mkldnn_fuse"};
+};
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h
@ -0,0 +1,49 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+/*
+ * Fuse the Conv and BatchNorm to a ConvBNMKLDNNOp.
+ */
+class ConvBNFusePass : public FusePassBase {
+ public:
+  virtual ~ConvBNFusePass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
+  const std::string name_scope_{"conv_bn_fuse"};
+};
+
+class ConvEltwiseAddBNFusePass : public FusePassBase {
+ public:
+  virtual ~ConvEltwiseAddBNFusePass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
+  const std::string name_scope_{"conv_eltwiseadd_bn_fuse"};
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
@ -0,0 +1,154 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
+#include <functional>
+#include <utility>
+
+#include "paddle/fluid/framework/ir/graph_traits.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+namespace {
+
+// The function keeps the graph consistent by replacing
+// a node 'from' in the set of inputs nodes
+// of the visited node by a node 'to'.
+void CorrectGraphEdges(Graph* graph, Node* from, Node* to) {
+  for (auto& node : GraphTraits::DFS(*graph)) {
+    auto from_in_inputs =
+        std::find(std::begin(node.inputs), std::end(node.inputs), from);
+
+    if (from_in_inputs != std::end(node.inputs)) {
+      IR_NODE_LINK_TO(to, (&node));
+
+      auto inputs = node.Op()->Inputs();
+
+      using input_type = VariableNameMap::value_type;
+
+      std::for_each(std::begin(inputs), std::end(inputs),
+                    [from, to, &node](const input_type& i) -> void {
+                      auto param_names = i.second;
+                      auto pi = std::find(std::begin(param_names),
+                                          std::end(param_names), from->Name());
+
+                      if (pi != std::end(param_names)) {
+                        node.Op()->SetInput(i.first, {to->Name()});
+                      }
+                    });
+    }
+  }
+}
+}  // namespace
+using graph_ptr = std::unique_ptr<ir::Graph>;
+
+graph_ptr ConvElementwiseAddMKLDNNFusePass::ApplyImpl(graph_ptr graph) const {
+  FusePassBase::Init(name_scope_, graph.get());
+
+  GraphPatternDetector gpd;
+  auto pattern = gpd.mutable_pattern();
+
+  patterns::Conv conv_pattern{pattern, name_scope_};
+  auto conv_output = conv_pattern();
+
+  patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_};
+  elementwise_add_pattern(conv_output);
+
+  conv_output->AsIntermediate();
+
+  auto conv_op_has_bias = [](const Node& conv_op) -> std::pair<bool, Node*> {
+    auto bias_input_names = conv_op.Op()->Inputs();
+    auto bias_it = bias_input_names.find("Bias");
+
+    if (bias_it != std::end(bias_input_names)) {
+      bool has_bias = !bias_it->second.empty();
+
+      if (has_bias) {
+        auto conv_bias_names = bias_it->second;
+        auto conv_bias_names_it =
+            std::find_if(std::begin(conv_op.inputs), std::end(conv_op.inputs),
+                         [&conv_bias_names](Node* n) -> bool {
+                           return n->Name() == conv_bias_names[0];
+                         });
+        return std::make_pair(has_bias, *conv_bias_names_it);
+      }
+    }
+
+    return std::make_pair(false, nullptr);
+  };
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
+                              elementwise_add_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x,
+                              elementwise_add_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
+                              elementwise_add_pattern);
+
+    if (FindFuseOption(*conv_op, *elementwise_add_op) != FUSE_MKLDNN) return;
+
+    OpDesc op_desc;
+    op_desc.SetType("conv2d");
+
+    op_desc.SetInput("Input", {conv_input->Name()});
+    op_desc.SetInput("Filter", {conv_filter->Name()});
+    op_desc.SetInput("ResidualData", {elementwise_add_x->Name()});
+    op_desc.SetOutput("Output", {conv_output->Name()});
+
+    bool has_bias;
+    Node* conv_bias;
+
+    std::tie(has_bias, conv_bias) = conv_op_has_bias(*conv_op);
+
+    if (has_bias) {
+      op_desc.SetInput("Bias", {conv_bias->Name()});
+    }
+
+    for (const auto& attr : conv_op->Op()->GetAttrMap()) {
+      op_desc.SetAttr(attr.first, attr.second);
+    }
+
+    op_desc.SetAttr("fuse_residual_connection", true);
+
+    auto fused_conv_op = g->CreateOpNode(&op_desc);
+
+    IR_NODE_LINK_TO(conv_input, fused_conv_op);
+    IR_NODE_LINK_TO(conv_filter, fused_conv_op);
+    IR_NODE_LINK_TO(elementwise_add_x, fused_conv_op);
+    IR_NODE_LINK_TO(fused_conv_op, conv_output);
+
+    if (has_bias) {
+      IR_NODE_LINK_TO(conv_bias, fused_conv_op);
+    }
+
+    CorrectGraphEdges(g, elementwise_add_out, conv_output);
+    GraphSafeRemoveNodes(g, {elementwise_add_out, conv_op, elementwise_add_op});
+  };
+
+  gpd(graph.get(), handler);
+
+  return graph;
+}
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass,
+              paddle::framework::ir::ConvElementwiseAddMKLDNNFusePass);
--- a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h
@ -0,0 +1,38 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class ConvElementwiseAddMKLDNNFusePass : public FusePassBase {
+ public:
+  virtual ~ConvElementwiseAddMKLDNNFusePass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
+
+  const std::string name_scope_{"residual_connections_fuse_pass"};
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
@ -0,0 +1,247 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <string>
+
+#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
+#include "paddle/fluid/framework/ir/graph_traits.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+namespace {
+constexpr int nodes_removed = 3;
+constexpr int nodes_added = 1;
+
+void SetOp(ProgramDesc* prog, const std::string& type,
+           const std::vector<std::pair<std::string, std::string>>& inputs,
+           const std::pair<std::string, std::string>& output) {
+  auto op = prog->MutableBlock(0)->AppendOp();
+  op->SetType(type);
+  op->SetAttr("use_mkldnn", true);
+
+  for (const auto& input : inputs) {
+    op->SetInput(input.first, {input.second});
+  }
+
+  op->SetOutput(output.first, {output.second});
+}
+
+struct IsReachable {
+  using func = std::function<bool(const std::string&, const std::string&)>;
+
+  auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
+    auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
+                        const std::string& name) -> Node* {
+      for (auto& node : GraphTraits::DFS(*graph)) {
+        if (name == node.Name()) {
+          return &node;
+        }
+      }
+
+      return nullptr;
+    };
+
+    return [&](std::string from, const std::string to) -> bool {
+      if (from == to) return true;
+
+      std::map<std::string, bool> visited;
+
+      for (auto& node : GraphTraits::DFS(*graph)) {
+        visited[node.Name()] = false;
+      }
+
+      visited[from] = true;
+
+      std::list<std::string> queue;
+      queue.push_back(from);
+
+      while (!queue.empty()) {
+        auto cur = find_node(graph, queue.front());
+        queue.pop_front();
+
+        if (cur == nullptr) return false;
+
+        for (auto n : cur->outputs) {
+          if (n->Name() == to) return true;
+
+          if (!visited[n->Name()]) {
+            visited[n->Name()] = true;
+            queue.push_back(n->Name());
+          }
+        }
+      }
+      return false;
+    };
+  }
+};
+
+void AssertOpsCount(const std::unique_ptr<ir::Graph>& graph) {
+  int conv_count = 0;
+  int elementwise_add_count = 0;
+
+  for (auto* node : graph->Nodes()) {
+    if (node->IsOp() && node->Op()->Type() == "conv2d") {
+      ++conv_count;
+    }
+    if (node->IsOp() && node->Op()->Type() == "elementwise_add") {
+      ++elementwise_add_count;
+    }
+  }
+  EXPECT_EQ(conv_count, 1);
+  EXPECT_EQ(elementwise_add_count, 0);
+}
+
+ProgramDesc BuildProgramDesc(const std::vector<std::string>& transient_vars,
+                             const std::vector<std::string>& persistent_vars) {
+  ProgramDesc prog;
+
+  auto add_var_to_prog = [&prog](const std::string& var_name) -> VarDesc* {
+    auto var = prog.MutableBlock(0)->Var(var_name);
+    var->SetType(proto::VarType::LOD_TENSOR);
+
+    return var;
+  };
+
+  for (const auto& v : transient_vars) {
+    add_var_to_prog(v);
+  }
+
+  for (const auto& v : persistent_vars) {
+    auto var = add_var_to_prog(v);
+    var->SetPersistable(true);
+  }
+
+  return prog;
+}
+}  // namespace
+
+TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionWithElementwiseAddRelu) {
+  auto prog =
+      BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
+
+  SetOp(&prog, "conv2d",
+        {{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
+        {"Output", "b"});
+  SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
+  SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  IsReachable is_reachable;
+  EXPECT_TRUE(is_reachable(graph)("a", "relu"));
+
+  auto pass =
+      PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
+  int original_nodes_num = graph->Nodes().size();
+  graph = pass->Apply(std::move(graph));
+  int current_nodes_num = graph->Nodes().size();
+
+  EXPECT_TRUE(is_reachable(graph)("a", "relu"));
+
+  EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
+            current_nodes_num);
+
+  AssertOpsCount(graph);
+}
+
+TEST(ConvElementwiseAddMKLDNNFusePass,
+     ConvolutionWithElementwiseAddReluNoBias) {
+  auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
+  SetOp(&prog, "conv2d", {{"Input", "a"}, {"Filter", "weights"}},
+        {"Output", "b"});
+  SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
+  SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  IsReachable is_reachable;
+
+  EXPECT_TRUE(is_reachable(graph)("a", "relu"));
+
+  auto pass =
+      PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
+  int original_nodes_num = graph->Nodes().size();
+  graph = pass->Apply(std::move(graph));
+  int current_nodes_num = graph->Nodes().size();
+
+  EXPECT_TRUE(is_reachable(graph)("a", "relu"));
+
+  EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
+            current_nodes_num);
+
+  AssertOpsCount(graph);
+}
+
+TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionElementwiseAdd) {
+  auto prog = BuildProgramDesc({"a", "b", "c", "d"}, {"bias", "weights"});
+  SetOp(&prog, "conv2d",
+        {{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
+        {"Output", "b"});
+  SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  IsReachable is_reachable;
+  EXPECT_TRUE(is_reachable(graph)("a", "d"));
+
+  auto pass =
+      PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
+  int original_nodes_num = graph->Nodes().size();
+  graph = pass->Apply(std::move(graph));
+  int current_nodes_num = graph->Nodes().size();
+
+  EXPECT_FALSE(is_reachable(graph)("a", "d"));
+
+  EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
+            current_nodes_num);
+  AssertOpsCount(graph);
+}
+
+TEST(ConvElementwiseAddMKLDNNFusePass, SigmoidConvolutionAddElementwiseRelu) {
+  auto prog =
+      BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
+  SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
+  SetOp(&prog, "conv2d",
+        {{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
+        {"Output", "c"});
+  SetOp(&prog, "elementwise_add", {{"X", "c"}, {"Y", "d"}}, {"Out", "e"});
+  SetOp(&prog, "relu", {{"X", "e"}}, {"Out", "f"});
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  IsReachable is_reachable;
+
+  EXPECT_TRUE(is_reachable(graph)("a", "f"));
+
+  auto pass =
+      PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
+  int original_nodes_num = graph->Nodes().size();
+  graph = pass->Apply(std::move(graph));
+  int current_nodes_num = graph->Nodes().size();
+
+  EXPECT_TRUE(is_reachable(graph)("a", "f"));
+
+  EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
+            current_nodes_num);
+  AssertOpsCount(graph);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(conv_elementwise_add_mkldnn_fuse_pass);
--- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
@ -46,6 +46,12 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
    GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern);  // Out
    GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern);  // ReLU op

+    FuseOptions fuse_option = FindFuseOption(*conv, *relu);
+    if (fuse_option == DO_NOT_FUSE) {
+      VLOG(3) << "do not perform conv+relu fuse";
+      return;
+    }
+
    // Transform Conv node into ConvReLU node.
    OpDesc* desc = conv->Op();
    desc->SetOutput("Output", std::vector<std::string>({relu_out->Name()}));
--- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
@ -20,17 +20,19 @@ namespace paddle {
 namespace framework {
 namespace ir {

-void SetOp(ProgramDesc* prog, const std::string& type,
+void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
           const std::vector<std::string>& inputs,
-           const std::vector<std::string>& outputs) {
+           const std::vector<std::string>& outputs, bool use_mkldnn = false) {
  auto* op = prog->MutableBlock(0)->AppendOp();
  op->SetType(type);
  if (type == "conv2d") {
-    op->SetAttr("use_mkldnn", true);
+    op->SetAttr("use_mkldnn", use_mkldnn);
+    op->SetAttr("name", name);
    op->SetInput("Input", {inputs[0]});
    op->SetInput("Filter", {inputs[1]});
    op->SetInput("Bias", {inputs[2]});
  } else if (type == "relu") {
+    op->SetAttr("use_mkldnn", use_mkldnn);
    op->SetInput("X", inputs);
  }
  op->SetOutput("Out", outputs);
@ -43,7 +45,8 @@ void SetOp(ProgramDesc* prog, const std::string& type,
 ProgramDesc BuildProgramDesc() {
  ProgramDesc prog;
  for (auto& v :
-       std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g"})) {
+       std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
+                                 "h", "weights2", "bias2", "k", "l"})) {
    auto* var = prog.MutableBlock(0)->Var(v);
    var->SetType(proto::VarType::SELECTED_ROWS);
    if (v == "weights" || v == "bias") {
@ -51,14 +54,24 @@ ProgramDesc BuildProgramDesc() {
    }
  }

-  SetOp(&prog, "OP0", std::vector<std::string>({"a"}),
+  SetOp(&prog, "OP0", "op0", std::vector<std::string>({"a"}),
        std::vector<std::string>({"b"}));
-  SetOp(&prog, "OP1", std::vector<std::string>({"b"}),
+  SetOp(&prog, "OP1", "op1", std::vector<std::string>({"b"}),
        std::vector<std::string>({"c"}));
-  SetOp(&prog, "conv2d", std::vector<std::string>({"c", "weights", "bias"}),
-        std::vector<std::string>({"f"}));
-  SetOp(&prog, "relu", std::vector<std::string>({"f"}),
-        std::vector<std::string>({"g"}));
+  // conv+relu, both with MKL-DNN
+  SetOp(&prog, "conv2d", "conv1",
+        std::vector<std::string>({"c", "weights", "bias"}),
+        std::vector<std::string>({"f"}), true);
+  SetOp(&prog, "relu", "relu1", std::vector<std::string>({"f"}),
+        std::vector<std::string>({"g"}), true);
+  SetOp(&prog, "OP3", "op3", std::vector<std::string>({"g"}),
+        std::vector<std::string>({"h"}));
+  // conv+relu, only one with MKL-DNN
+  SetOp(&prog, "conv2d", "conv2",
+        std::vector<std::string>({"h", "weights2", "bias2"}),
+        std::vector<std::string>({"k"}), true);
+  SetOp(&prog, "relu", "relu2", std::vector<std::string>({"k"}),
+        std::vector<std::string>({"l"}));

  return prog;
 }
@ -88,10 +101,16 @@ TEST(ConvReLUFusePass, basic) {
      auto* op = node->Op();
      ASSERT_TRUE(op->HasAttr("use_mkldnn"));
      EXPECT_TRUE(boost::get<bool>(op->GetAttr("use_mkldnn")));
-      ASSERT_TRUE(op->HasAttr("fuse_relu"));
-      bool fuse_relu = boost::get<bool>(op->GetAttr("fuse_relu"));
-      if (fuse_relu) {
-        ++conv_relu_count;
+      // check if only "conv1" convolution is fused
+      auto op_name = boost::get<std::string>(op->GetAttr("name"));
+      if (op_name == "conv1") {
+        ASSERT_TRUE(op->HasAttr("fuse_relu"));
+        bool fuse_relu = boost::get<bool>(op->GetAttr("fuse_relu"));
+        if (fuse_relu) {
+          ++conv_relu_count;
+        }
+      } else if (op_name == "conv2") {
+        ASSERT_FALSE(op->HasAttr("fuse_relu"));
      }
    }
  }
--- a/paddle/fluid/framework/ir/fuse_pass_base.cc
+++ b/paddle/fluid/framework/ir/fuse_pass_base.cc
@ -0,0 +1,62 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void FusePassBase::Init(const std::string& repr, Graph* graph) const {
+  repr_ = repr;
+  graph_ = graph;
+}
+
+Scope* FusePassBase::param_scope() const {
+  PADDLE_ENFORCE(graph_->Has(kParamScopeAttr));
+  return graph_->Get<framework::Scope*>(kParamScopeAttr);
+}
+
+void FusePassBase::AddStatis(int count_of_fused) const {
+  PADDLE_ENFORCE(graph_);
+  PADDLE_ENFORCE(!repr_.empty());
+  if (!graph_->Has(kFuseStatisAttr)) {
+    graph_->Set(kFuseStatisAttr, new std::unordered_map<std::string, int>);
+  }
+  auto& info =
+      graph_->Get<std::unordered_map<std::string, int>>(kFuseStatisAttr);
+  info[repr_] = count_of_fused;
+}
+
+FuseOptions FusePassBase::FindFuseOption(const Node& node1,
+                                         const Node& node2) const {
+#ifdef PADDLE_WITH_MKLDNN
+  bool node1_mkldnn = node1.Op()->HasAttr("use_mkldnn") &&
+                      boost::get<bool>(node1.Op()->GetAttr("use_mkldnn"));
+  bool node2_mkldnn = node2.Op()->HasAttr("use_mkldnn") &&
+                      boost::get<bool>(node2.Op()->GetAttr("use_mkldnn"));
+  if (node1_mkldnn && node2_mkldnn)
+    return FUSE_MKLDNN;
+  else if (!node1_mkldnn && !node2_mkldnn)
+    return FUSE_NATIVE;
+  else
+    return DO_NOT_FUSE;
+#else
+  return FUSE_NATIVE;
+#endif
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/fuse_pass_base.h
+++ b/paddle/fluid/framework/ir/fuse_pass_base.h
@ -25,32 +25,24 @@ namespace ir {
 static const char kParamScopeAttr[] = "__param_scope__";
 static const char kFuseStatisAttr[] = "__fuse_statis__";

+enum FuseOptions {
+  DO_NOT_FUSE,  // fusing will not be done
+  FUSE_NATIVE,  // fusing will be done without MKL-DNN
+  FUSE_MKLDNN   // fusing will be done with MKL-DNN
+};
+
 class FusePassBase : public Pass {
 public:
-  void Init(const std::string& repr, Graph* graph) const {
-    repr_ = repr;
-    graph_ = graph;
-  }
-
-  Scope* param_scope() const {
-    PADDLE_ENFORCE(graph_->Has(kParamScopeAttr));
-    return graph_->Get<framework::Scope*>(kParamScopeAttr);
-  }
-
-  void AddStatis(int count_of_fused) const {
-    PADDLE_ENFORCE(graph_);
-    PADDLE_ENFORCE(!repr_.empty());
-    if (!graph_->Has(kFuseStatisAttr)) {
-      graph_->Set(kFuseStatisAttr, new std::unordered_map<std::string, int>);
-    }
-    auto& info =
-        graph_->Get<std::unordered_map<std::string, int>>(kFuseStatisAttr);
-    info[repr_] = count_of_fused;
-  }
+  void Init(const std::string& repr, Graph* graph) const;
+  Scope* param_scope() const;
+  void AddStatis(int count_of_fused) const;

  virtual ~FusePassBase() {}

 protected:
+  virtual FuseOptions FindFuseOption(const Node& node1,
+                                     const Node& node2) const;
+
  mutable Graph* graph_;
  mutable std::string repr_;
 };
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
--- a/Show More
+++ b/Show More