merge develop, fix conflict

test=develop
6 years ago · fd23262e0c
parent e893cbd286 ead558b7f6
commit fd23262e0c
165 changed files with 6149 additions and 1129 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
        "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
 message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
        "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 message(STATUS "AR tools: ${CMAKE_AR}")
 if(WIN32)
    set(CMAKE_SUPPRESS_REGENERATION ON)
    set(CMAKE_STATIC_LIBRARY_PREFIX lib)
--- a/cmake/operators.cmake
+++ b/cmake/operators.cmake
@ -110,7 +110,7 @@ function(op_library TARGET)
    # Define operators that don't need pybind here.
    foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
 "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
-"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op")
+"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op")
        if ("${TARGET}" STREQUAL "${manual_pybind_op}")
            set(pybind_flag 1)
        endif()
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -12,7 +12,7 @@ paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], va
 paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2'))
 paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
-paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45'))
+paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'f482e93b38b4018796969a2e1dde479d'))
 paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
 paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
 paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
@ -68,7 +68,7 @@ paddle.fluid.initializer.MSRAInitializer.__init__ (ArgSpec(args=['self', 'unifor
 paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6d0f3e22c90d9d500d36ff57daf056ee'))
 paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'a6d7011ca3d8c0d454dac3a56eae0c29'))
 paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '1929058262994f212620599c63aea6bd'))
+paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '424e898365195e3ccbc2e7dc8b63605e'))
 paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a'))
 paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6'))
 paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d'))
@ -91,7 +91,7 @@ paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'po
 paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625'))
 paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95'))
 paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '120f4323a3d7ed9c0916f15a59f0e497'))
-paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'c527b71b8a4c60dca8df8a745c2b598d'))
+paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '320c6973b02ea179fa89fecc80796464'))
 paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', 'e45e09e65a2658e07cad987222f0d9ab'))
 paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb'))
 paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9'))
@ -330,7 +330,8 @@ paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes',
 paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
 paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
 paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
-paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691'))
+paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gtscore', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '57fa96922e42db8f064c3fb77f2255e8'))
 paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5566169a5ab993d177792c023c7fb340'))
 paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
 paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
 paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7bb011ec26bace2bc23235aa4a17647d'))
@ -367,7 +368,7 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init',
 paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
 paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
 paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
-paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
+paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
 paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
 paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
@ -392,9 +393,9 @@ paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'
 paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
 paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
-paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7'))
+paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '2ab36d4f7a564f5f65e455807ad06c67'))
-paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196'))
+paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '59066bac9db0ac6ce414d05780b7333f'))
-paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8'))
+paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '74c39c595dc70d6be2f16d8e462d282b'))
 paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e'))
 paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634'))
@ -493,7 +494,7 @@ paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinne
 paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753'))
+paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'f8f3df23c5633c614db781a91b81fb62'))
 paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca'))
 paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85'))
 paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
@ -517,11 +518,11 @@ paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', de
 paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4'))
 paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
 paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
-paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560'))
+paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796'))
 paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69'))
+paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45'))
 paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
 paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada'))
-paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a'))
+paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', 'f45fcb7add066c8e042c6774fc7c3db2'))
-paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8'))
+paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', 'b4a94ee0e2cefb495619275c2f8c61d2'))
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@ -16,6 +16,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <memory>
 #include <utility>
 #include "paddle/fluid/framework/details/memory_optimize_helper.h"
 #include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
@ -49,6 +50,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
      AppendPass("sequential_execution_pass");
    }
    // Add op fusion.
    if (strategy.sync_batch_norm_) {
      AppendPass("sync_batch_norm_pass");
    }
    // Add op fusion.
    if (strategy.fuse_relu_depthwise_conv_) {
      AppendPass("fuse_relu_depthwise_conv_pass");
@ -227,6 +233,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(sync_batch_norm_pass);
 USE_PASS(fuse_relu_depthwise_conv_pass);
 USE_PASS(fuse_elewise_add_act_pass);
 USE_PASS(graph_viz_pass);
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@ -77,6 +77,8 @@ struct BuildStrategy {
  bool fuse_relu_depthwise_conv_{false};
  bool sync_batch_norm_{false};
  bool memory_optimize_{true};
  // TODO(dzhwinter):
  // make enable_inplace, memory_optimize_
--- a/paddle/fluid/framework/details/graph_test_base.h
+++ b/paddle/fluid/framework/details/graph_test_base.h
@ -68,11 +68,11 @@ class SplitOpMaker : public OpProtoAndCheckerMaker {
 class DummyVarTypeInference : public VarTypeInference {
 public:
-  void operator()(const OpDesc& op_desc, BlockDesc* block) const override {
+  void operator()(framework::InferVarTypeContext* ctx) const override {
-    auto& inputs = op_desc.Input("X");
+    auto& inputs = ctx->Input("X");
-    auto type = block->Var(inputs.front())->GetType();
+    auto type = ctx->GetType(inputs.front());
-    auto out_var_name = op_desc.Output("Out").front();
+    auto out_var_name = ctx->Output("Out").front();
-    block->Var(out_var_name)->SetType(type);
+    ctx->SetType(out_var_name, type);
  }
 };
--- a/paddle/fluid/framework/details/memory_optimize_helper.cc
+++ b/paddle/fluid/framework/details/memory_optimize_helper.cc
@ -337,7 +337,6 @@ bool NodeCanReused(const VarDesc& node) {
  auto type = node.GetType();
  // only these types holds bulk of gpu memory
  if (!(type == proto::VarType::LOD_TENSOR ||
        type == proto::VarType::SELECTED_ROWS ||
        type == proto::VarType::LOD_TENSOR_ARRAY)) {
    return false;
  }
--- a/paddle/fluid/framework/details/op_registry.h
+++ b/paddle/fluid/framework/details/op_registry.h
@ -16,6 +16,8 @@ limitations under the License. */
 #include <string>
 #include <tuple>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/framework/grad_op_desc_maker.h"
 #include "paddle/fluid/framework/inplace_op_inference.h"
@ -127,9 +129,9 @@ struct OpInfoFiller<T, kGradOpDescMaker> {
 template <typename T>
 struct OpInfoFiller<T, kVarTypeInference> {
  void operator()(const char* op_type, OpInfo* info) const {
-    info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) {
+    info->infer_var_type_ = [](InferVarTypeContext* context) {
      T inference;
-      inference(fwd_op, block);
+      inference(context);
    };
  }
 };
--- a/paddle/fluid/framework/grad_op_desc_maker.h
+++ b/paddle/fluid/framework/grad_op_desc_maker.h
@ -14,7 +14,9 @@ limitations under the License. */
 #pragma once
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/framework/op_desc.h"
@ -55,11 +57,11 @@ class GradOpDescMakerBase {
                   std::back_inserter(ret_val),
                   [this](const std::string& fwd_var_name) -> std::string {
                     auto g_name = GradVarName(fwd_var_name);
-                     if (no_grad_set_.count(g_name)) {
+                     if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) {
                       return kEmptyVarName;
                     } else {
                       (*this->grad_to_var_)[g_name] = fwd_var_name;
                       return g_name;
                     } else {
                       return kEmptyVarName;
                     }
                   });
    if (!drop_empty_grad) {
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -46,6 +46,9 @@ cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass)
 pass_library(graph_to_program_pass base)
 pass_library(graph_viz_pass base)
 pass_library(lock_free_optimize_pass base)
 pass_library(cpu_quantize_placement_pass base)
 pass_library(cpu_quantize_pass inference)
 pass_library(cpu_quantize_squash_pass inference)
 pass_library(fc_fuse_pass inference)
 pass_library(attention_lstm_fuse_pass inference)
 pass_library(infer_clean_graph_pass inference)
@ -66,6 +69,8 @@ pass_library(conv_elementwise_add_fuse_pass inference)
 pass_library(conv_affine_channel_fuse_pass inference)
 pass_library(transpose_flatten_concat_fuse_pass inference)
 pass_library(identity_scale_op_clean_pass base)
 pass_library(sync_batch_norm_pass base)
 pass_library(runtime_context_cache_pass base)
 # There may be many transpose-flatten structures in a model, and the output of
 # these structures will be used as inputs to the concat Op. This pattern will
@ -100,6 +105,12 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
 cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
 cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto)
 cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass)
 cc_test(test_cpu_quantize_placement_pass SRCS cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass)
 cc_test(test_cpu_quantize_pass SRCS cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor)
 cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
 if(NOT WIN32)
    cc_test(test_sync_batch_norm_pass SRCS sync_batch_norm_pass_tester.cc DEPS sync_batch_norm_pass)
 endif()
 if (WITH_MKLDNN)
    cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass)
    cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
--- a/paddle/fluid/framework/ir/cpu_quantize_pass.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_pass.cc
@ -0,0 +1,239 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/cpu_quantize_pass.h"
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/string/pretty_log.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 namespace {
 void UnlinkNodes(ir::Node* a, ir::Node* b) {
  a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
                   a->outputs.end());
  b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
                  b->inputs.end());
 }
 }  // namespace
 enum { U8_MAX = 255, S8_MAX = 127 };
 using EigenVectorArrayMap = Eigen::Map<Eigen::Array<double, Eigen::Dynamic, 1>>;
 using string::PrettyLogDetail;
 void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
                                    std::string input_name, double scale_to_one,
                                    bool is_unsigned,
                                    std::string scale_attr_name) const {
  unsigned max = is_unsigned ? U8_MAX : S8_MAX;
  float scale = scale_to_one * max;
  // Create quantize output variable
  VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
  auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
  // create a quantize op node
  OpDesc q_desc;
  q_desc.SetType("quantize");
  q_desc.SetInput("Input", std::vector<std::string>({input->Name()}));
  q_desc.SetOutput("Output",
                   std::vector<std::string>({quantize_out_node->Name()}));
  q_desc.SetAttr("Scale", scale);
  q_desc.SetAttr("is_negative_input", !is_unsigned);
  auto quantize_op = g->CreateOpNode(&q_desc);  // OpDesc will be copied.
  // update op's input
  op->Op()->SetInput(input_name,
                     std::vector<std::string>({quantize_out_node->Name()}));
  // link quantize op
  UnlinkNodes(input, op);
  IR_NODE_LINK_TO(input, quantize_op);
  IR_NODE_LINK_TO(quantize_op, quantize_out_node);
  IR_NODE_LINK_TO(quantize_out_node, op);
  if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
 }
 void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
                                       std::string output_name,
                                       double scale_to_one, bool is_unsigned,
                                       std::string scale_attr_name) const {
  unsigned max = is_unsigned ? U8_MAX : S8_MAX;
  float scale = scale_to_one * max;
  // Create dequantize input variable
  VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
  auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
  // create a dequantize op node for output.
  OpDesc deq_desc;
  deq_desc.SetType("dequantize");
  deq_desc.SetInput("Input",
                    std::vector<std::string>({dequantize_in_node->Name()}));
  deq_desc.SetOutput("Output", std::vector<std::string>({output->Name()}));
  deq_desc.SetAttr("Scale", scale);
  auto dequantize_op = g->CreateOpNode(&deq_desc);  // OpDesc will be copied.
  // update op's output
  op->Op()->SetOutput(output_name,
                      std::vector<std::string>({dequantize_in_node->Name()}));
  // link dequantize op
  UnlinkNodes(op, output);
  IR_NODE_LINK_TO(op, dequantize_in_node);
  IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
  IR_NODE_LINK_TO(dequantize_op, output);
  if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
 }
 void CPUQuantizePass::QuantizeConv(Graph* graph,
                                   bool with_residual_data) const {
  GraphPatternDetector gpd;
  auto pattern = gpd.mutable_pattern();
  patterns::ConvResidual conv_pattern{pattern, name_scope_};
  conv_pattern(with_residual_data);
  int quantize_conv_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "Quantize conv2d op";
    GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
    auto* conv_op_desc = conv_op->Op();
    // skip if should not be quantized
    if (!conv_op_desc->HasAttr("use_quantizer") ||
        !boost::get<bool>(conv_op_desc->GetAttr("use_quantizer")))
      return;
    GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
    // get scales calculated after warmup, they scale variables to MAX=1.0
    auto scales = Get<VarQuantScale>("quant_var_scales");
    auto input_scale = scales[conv_input->Name()].second.data<double>()[0];
    bool is_input_unsigned = scales[conv_input->Name()].first;
    QuantizeInput(g, conv_op, conv_input, "Input", input_scale,
                  is_input_unsigned, "Scale_in");
    auto filter_scale_tensor = scales[conv_filter->Name()].second;
    EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data<double>(),
                                     filter_scale_tensor.numel(), 1};
    eigen_tensor *= static_cast<double>(S8_MAX);
    std::vector<float> filter_scale{
        filter_scale_tensor.data<double>(),
        filter_scale_tensor.data<double>() + filter_scale_tensor.numel()};
    conv_op->Op()->SetAttr("Scale_weights", filter_scale);
    if (with_residual_data) {
      GET_IR_NODE_FROM_SUBGRAPH(conv_residual_data, conv_residual_data,
                                conv_pattern);
      auto residual_scale =
          scales[conv_residual_data->Name()].second.data<double>()[0];
      bool is_residual_unsigned = scales[conv_residual_data->Name()].first;
      QuantizeInput(g, conv_op, conv_residual_data, "ResidualData",
                    residual_scale, is_residual_unsigned, "Scale_in_eltwise");
    }
    auto output_scale = scales[conv_output->Name()].second.data<double>()[0];
    bool is_output_unsigned = scales[conv_output->Name()].first;
    DequantizeOutput(g, conv_op, conv_output, "Output", output_scale,
                     is_output_unsigned, "Scale_out");
    ++quantize_conv_count;
  };
  gpd(graph, handler);
  AddStatis(quantize_conv_count);
  std::stringstream msg_ss;
  msg_ss << "---    quantized " << quantize_conv_count << " conv2d ops";
  if (with_residual_data) msg_ss << " with residual connection";
  PrettyLogDetail(msg_ss.str().c_str());
 }
 void CPUQuantizePass::QuantizePool(Graph* graph) const {
  GraphPatternDetector gpd;
  auto pattern = gpd.mutable_pattern();
  patterns::Pool pool_pattern{pattern, name_scope_};
  pool_pattern();
  int quantize_pool_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "Quantize pool2d op";
    GET_IR_NODE_FROM_SUBGRAPH(pool_op, pool_op, pool_pattern);
    auto* pool_op_desc = pool_op->Op();
    // skip if should not be quantized
    if (!pool_op_desc->HasAttr("use_quantizer") ||
        !boost::get<bool>(pool_op_desc->GetAttr("use_quantizer")))
      return;
    GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
    // get scales calculated after warmup, they scale variables to MAX=1.0
    auto scales = Get<VarQuantScale>("quant_var_scales");
    auto input_scale = scales[pool_input->Name()].second.data<double>()[0];
    bool is_input_unsigned = scales[pool_input->Name()].first;
    QuantizeInput(g, pool_op, pool_input, "X", input_scale, is_input_unsigned);
    auto output_scale = scales[pool_output->Name()].second.data<double>()[0];
    bool is_output_unsigned = scales[pool_output->Name()].first;
    DequantizeOutput(g, pool_op, pool_output, "Out", output_scale,
                     is_output_unsigned);
    ++quantize_pool_count;
  };
  gpd(graph, handler);
  AddStatis(quantize_pool_count);
  PrettyLogDetail("---    quantized %d pool2d ops", quantize_pool_count);
 }
 std::unique_ptr<ir::Graph> CPUQuantizePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  VLOG(3) << "Quantizing the graph.";
  PADDLE_ENFORCE(graph.get());
  FusePassBase::Init(name_scope_, graph.get());
  PADDLE_ENFORCE(param_scope());
  QuantizeConv(graph.get(), true /* with_residual_data */);
  QuantizeConv(graph.get());
  QuantizePool(graph.get());
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(cpu_quantize_pass, paddle::framework::ir::CPUQuantizePass)
    .RequirePassAttr("quant_var_scales");
--- a/paddle/fluid/framework/ir/cpu_quantize_pass.h
+++ b/paddle/fluid/framework/ir/cpu_quantize_pass.h
@ -0,0 +1,66 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 /*
 * Map variable name to tensor of scaling factors scaling it to MAX=1.0.
 * bool denotes whether quantization of the variable should be done to unsigned
 * type.
 */
 using VarQuantScale =
    std::unordered_map<std::string, std::pair<bool, LoDTensor>>;
 /*
 * Quantize all supported operators.
 */
 class CPUQuantizePass : public FusePassBase {
 public:
  virtual ~CPUQuantizePass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(
      std::unique_ptr<ir::Graph> graph) const override;
  void QuantizeConv(Graph* graph, bool with_residual_data = false) const;
  void QuantizePool(Graph* graph) const;
  void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
                     double scale_to_one, bool is_unsigned,
                     std::string scale_attr_name = "") const;
  void DequantizeOutput(Graph* g, Node* op, Node* output,
                        std::string output_name, double scale_to_one,
                        bool is_unsigned,
                        std::string scale_attr_name = "") const;
  const std::string name_scope_{"quantize"};
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_pass_tester.cc
@ -0,0 +1,211 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/cpu_quantize_pass.h"
 #include <gtest/gtest.h>
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/platform/place.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
           const std::vector<std::string>& inputs,
           const std::vector<std::string>& outputs, bool use_mkldnn,
           bool use_quantizer = false) {
  auto* op = prog->MutableBlock(0)->AppendOp();
  op->SetType(type);
  op->SetAttr("use_mkldnn", use_mkldnn);
  op->SetAttr("name", name);
  if (type == "conv2d") {
    op->SetInput("Input", {inputs[0]});
    op->SetInput("Filter", {inputs[1]});
    if (inputs.size() > 2)
      op->SetInput("Bias", {inputs[2]});
    else
      op->SetInput("Bias", {});
    if (inputs.size() > 3) {
      op->SetInput("ResidualData", {inputs[3]});
      op->SetAttr("fuse_residual_connection", true);
    } else {
      op->SetInput("ResidualData", {});
      op->SetAttr("fuse_residual_connection", false);
    }
    op->SetOutput("Output", {outputs[0]});
    op->SetAttr("use_quantizer", use_quantizer);
    op->SetAttr("Scale_in", 1.0f);
    op->SetAttr("Scale_out", 1.0f);
    op->SetAttr("Scale_weights", std::vector<float>{1.0f});
  } else if (type == "pool2d") {
    op->SetInput("X", {inputs[0]});
    op->SetOutput("Out", {outputs[0]});
    op->SetAttr("use_quantizer", use_quantizer);
  } else if (type == "dropout") {
    op->SetInput("X", {inputs[0]});
    op->SetOutput("Out", {outputs[0]});
  } else if (type == "fc") {
    op->SetInput("Input", {inputs[0]});
    if (inputs.size() > 1) op->SetInput("W", {inputs[1]});
    if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]});
    op->SetOutput("Out", {outputs[0]});
  }
 }
 static const std::initializer_list<std::string> variable_names{
    "a", "w1", "c",  "d", "w2", "e",  "f", "g",
    "h", "w3", "b1", "i", "j",  "w4", "b2"};
 // (a,w1)->Conv1->c and c->Pool1->d
 //
 // (d,w2)->Conv2->e and e->Pool2->f
 //
 // d->Dropout1->g and g->Fc1->h and (h,w3,b1,i)->Conv3->j
 //
 // (d,w4, b2)->Conv4->i
 ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) {
  ProgramDesc prog;
  for (auto& v : variable_names) {
    auto* var = prog.MutableBlock(0)->Var(v);
    if (v.find("w") == 0 || v.find("b") == 0) {
      var->SetPersistable(true);
    }
  }
  SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"c"}, use_mkldnn,
        use_quantizer);
  SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, use_quantizer);
  SetOp(&prog, "conv2d", "Conv2", {"d", "w2"}, {"e"}, use_mkldnn,
        use_quantizer);
  SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, use_quantizer);
  SetOp(&prog, "dropout", "Dropout1", {"d"}, {"g"}, use_mkldnn);
  SetOp(&prog, "fc", "Fc1", {"g"}, {"h"}, use_mkldnn);
  SetOp(&prog, "conv2d", "Conv3", {"h", "w3", "b1", "i"}, {"j"}, use_mkldnn,
        use_quantizer);
  SetOp(&prog, "conv2d", "Conv4", {"c", "w4", "b2"}, {"i"}, use_mkldnn,
        use_quantizer);
  return prog;
 }
 void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                      const char* var_name) {
  auto x = scope->Var(var_name);
  auto tensor = x->GetMutable<LoDTensor>();
  tensor->mutable_data(place, proto::VarType::FP32,
                       ::paddle::memory::Allocator::kDefault, 1);
 }
 void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
              int quant_count, int dequant_count, int added_nodes_count,
              float scale) {
  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
  // Init scope, as it is used in pass
  auto place = paddle::platform::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
  exe.CreateVariables(prog, 0, true, &scope);
  auto* scales = new VarQuantScale();
  for (auto& v : variable_names) {
    InitTensorHolder(&scope, place, v.c_str());
    LoDTensor tensor;
    tensor.Resize({1});
    auto* ptr = tensor.mutable_data<double>(place);
    ptr[0] = 2.0;
    (*scales)[v] = std::make_pair(false, std::move(tensor));
  }
  graph->Set(kParamScopeAttr, new framework::Scope*(&scope));
  auto pass = PassRegistry::Instance().Get("cpu_quantize_pass");
  pass->Set("quant_var_scales", scales);
  int original_nodes_num = graph->Nodes().size();
  graph = pass->Apply(std::move(graph));
  int current_nodes_num = graph->Nodes().size();
  int quantize_nodes_count = 0;
  int dequantize_nodes_count = 0;
  int conv2d_nodes_count = 0;
  int pool2d_nodes_count = 0;
  for (auto* node : graph->Nodes()) {
    if (node->IsOp()) {
      auto* op = node->Op();
      if (op->Type() == "conv2d") {
        conv2d_nodes_count++;
        auto op_name = boost::get<std::string>(op->GetAttr("name"));
        EXPECT_EQ(boost::get<float>(op->GetAttr("Scale_in")), scale)
            << "Scale_in for node '" + op_name + "'.";
        EXPECT_EQ(boost::get<float>(op->GetAttr("Scale_out")), scale)
            << "Scale_out for node '" + op_name + "'.";
        EXPECT_EQ(
            boost::get<std::vector<float>>(op->GetAttr("Scale_weights"))[0],
            scale)
            << "Scale_weights for node '" + op_name + "'.";
      } else if (op->Type() == "pool2d") {
        pool2d_nodes_count++;
      } else if (op->Type() == "quantize") {
        quantize_nodes_count++;
      } else if (op->Type() == "dequantize") {
        dequantize_nodes_count++;
      }
    }
  }
  EXPECT_EQ(conv2d_nodes_count, conv_count);
  EXPECT_EQ(pool2d_nodes_count, pool_count);
  EXPECT_EQ(quantize_nodes_count, quant_count);
  EXPECT_EQ(dequantize_nodes_count, dequant_count);
  EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
 }
 TEST(CpuQuantizePass, quantize) {
  bool use_mkldnn = true;
  bool use_quantizer = true;
  // (a->QUANT1->IN1,w1)->Conv1->OUT1->DEQUANT1->c and
  // c->QUANT2->IN2->Pool1->OUT2->DEQUANT2->d
  //
  // (d->QUANT3->IN3,w2)->Conv2->OUT3->DEQUANT3->e and
  // e->QUANT4->IN4->Pool2->OUT4->DEQUANT4->f
  //
  // d->Dropout1->g and g->Fc1->h and
  // (h->QUANT5->IN5,w3,b1,i->QUANT6->IN6)->Conv3->OUT5->DEQUANT5->j
  //
  // (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i
  // Insert nodes: 7 Quant + 7 IN + 6 OUT + 6 DEQUANT
  int added_nodes = 7 + 7 + 6 + 6;
  MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 7, 6, added_nodes,
           2.0f * 127);
 }
 TEST(CpuQuantizePass, do_not_quantize) {
  bool use_mkldnn = true;
  bool use_quantizer = false;
  int added_nodes = 0;
  MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 0, 0, added_nodes,
           1.0f);
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(cpu_quantize_pass);
--- a/paddle/fluid/framework/ir/cpu_quantize_placement_pass.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.cc
@ -0,0 +1,58 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
 #include <string>
 #include <unordered_set>
 namespace paddle {
 namespace framework {
 namespace ir {
 std::unique_ptr<ir::Graph> CPUQuantizePlacementPass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  VLOG(3) << "Marks operators which are to be quantized.";
  const auto& excluded_ids_list =
      Get<std::unordered_set<int>>("quantize_excluded_op_ids");
  const auto& op_types_list =
      Get<std::unordered_set<std::string>>("quantize_enabled_op_types");
  for (const Node* n : graph->Nodes()) {
    if (n->IsOp()) {
      if (std::find(excluded_ids_list.begin(), excluded_ids_list.end(),
                    n->id()) != excluded_ids_list.end())
        continue;
      auto* op = n->Op();
      if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) {
        if (op_types_list.empty()) {
          op->SetAttr("use_quantizer", true);
        } else if (std::find(op_types_list.begin(), op_types_list.end(),
                             n->Name()) != op_types_list.end()) {
          op->SetAttr("use_quantizer", true);
        }
      }
    }
  }
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(cpu_quantize_placement_pass,
              paddle::framework::ir::CPUQuantizePlacementPass)
    // a vector of operator type names to be quantized ("conv2d" etc.)
    .RequirePassAttr("quantize_enabled_op_types")
    // a vector of operator ids that are to be excluded from quantization
    .RequirePassAttr("quantize_excluded_op_ids");
--- a/paddle/fluid/framework/ir/cpu_quantize_placement_pass.h
+++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.h
@ -0,0 +1,34 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <memory>
 #include "paddle/fluid/framework/ir/pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 /*
 * Specifies which operators should be quantized.
 */
 class CPUQuantizePlacementPass : public Pass {
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(
      std::unique_ptr<ir::Graph> graph) const override;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/cpu_quantize_placement_pass_tester.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass_tester.cc
@ -0,0 +1,129 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
 #include <gtest/gtest.h>
 #include <boost/logic/tribool.hpp>
 namespace paddle {
 namespace framework {
 namespace ir {
 void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
           const std::vector<std::string>& inputs,
           const std::vector<std::string>& outputs,
           boost::tribool use_quantizer) {
  auto* op = prog->MutableBlock(0)->AppendOp();
  op->SetType(type);
  if (!boost::indeterminate(use_quantizer))
    op->SetAttr("use_quantizer", use_quantizer);
  if (type == "conv2d") {
    op->SetAttr("name", name);
    op->SetInput("Input", {inputs[0]});
    op->SetInput("Filter", {inputs[1]});
    op->SetInput("Bias", {inputs[2]});
  } else if (type == "relu") {
    op->SetInput("X", inputs);
  } else if (type == "concat") {
    op->SetAttr("axis", 1);
    op->SetInput("X", {inputs[0], inputs[1]});
  } else if (type == "pool2d") {
    op->SetInput("X", {inputs[0]});
  } else {
    FAIL() << "Unexpected operator type.";
  }
  op->SetOutput("Out", {outputs[0]});
 }
 // operator                      use_quantizer
 // ---------------------------------------
 // (a,b)->concat->c              none
 // (c,weights,bias)->conv->f     false
 // f->relu->g                    none
 // g->pool->h                    false
 // (h,weights2,bias2)->conv->k   false
 // k->pool->l                    false
 ProgramDesc BuildProgramDesc() {
  ProgramDesc prog;
  for (auto& v :
       std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
                                 "h", "weights2", "bias2", "k", "l"})) {
    auto* var = prog.MutableBlock(0)->Var(v);
    var->SetType(proto::VarType::SELECTED_ROWS);
    if (v == "weights" || v == "bias") {
      var->SetPersistable(true);
    }
  }
  SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate);
  SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false);
  SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate);
  SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false);
  SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false);
  SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false);
  return prog;
 }
 void MainTest(std::initializer_list<std::string> quantize_enabled_op_types,
              std::initializer_list<int> quantize_excluded_op_ids,
              unsigned expected_use_quantizer_true_count) {
  auto prog = BuildProgramDesc();
  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
  auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass");
  pass->Set("quantize_enabled_op_types",
            new std::unordered_set<std::string>(quantize_enabled_op_types));
  pass->Set("quantize_excluded_op_ids",
            new std::unordered_set<int>(quantize_excluded_op_ids));
  graph = pass->Apply(std::move(graph));
  unsigned use_quantizer_true_count = 0;
  for (auto* node : graph->Nodes()) {
    if (node->IsOp()) {
      auto* op = node->Op();
      if (op->HasAttr("use_quantizer") &&
          boost::get<bool>(op->GetAttr("use_quantizer"))) {
        ++use_quantizer_true_count;
      }
    }
  }
  EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count);
 }
 TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); }
 TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
  MainTest({"conv2d"}, {4}, 1);
 }
 TEST(QuantizerPlacementPass, excluded_none) {
  // 2 conv + 2 pool
  MainTest({}, {}, 4);
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(cpu_quantize_placement_pass);
--- a/paddle/fluid/framework/ir/cpu_quantize_squash_pass.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.cc
@ -0,0 +1,146 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file eint8_outcept in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either eint8_outpress or
 // implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/cpu_quantize_squash_pass.h"
 #include <string>
 #include <vector>
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/string/pretty_log.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 using string::PrettyLogDetail;
 void CPUQuantizeSquashPass::FindNodesToKeep(
    Graph* graph,
    std::unordered_map<const Node*, int>* nodes_keep_counter) const {
  GraphPatternDetector gpd;
  patterns::DequantAny deq_any_pattern{gpd.mutable_pattern(), "deqant_any"};
  deq_any_pattern();
  int found_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    GET_IR_NODE_FROM_SUBGRAPH(dequant_out, dequant_out, deq_any_pattern);
    if (nodes_keep_counter->find(dequant_out) == nodes_keep_counter->end())
      (*nodes_keep_counter)[dequant_out] = 1;
    else
      (*nodes_keep_counter)[dequant_out] += 1;
    found_count++;
  };
  gpd(graph, handler);
  AddStatis(found_count);
 }
 void CPUQuantizeSquashPass::Squash(
    Graph* graph,
    std::unordered_map<const Node*, int>* nodes_keep_counter) const {
  GraphPatternDetector gpd;
  patterns::DequantQuantAny squash_pattern{gpd.mutable_pattern(), "squash"};
  squash_pattern();
  int found_squash_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "squash requantize-quantize ops pair";
    GET_IR_NODE_FROM_SUBGRAPH(dequant_in, dequant_in, squash_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(dequant_op, dequant_op, squash_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(dequant_out, dequant_out, squash_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(quant_op, quant_op, squash_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(quant_out, quant_out, squash_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, squash_pattern);
    auto* next_op_desc = next_op->Op();
    float dequant_scale = boost::get<float>(dequant_op->Op()->GetAttr("Scale"));
    float quant_scale = boost::get<float>(quant_op->Op()->GetAttr("Scale"));
    PADDLE_ENFORCE(nodes_keep_counter->find(dequant_out) !=
                   nodes_keep_counter->end());
    // check if dequantize op should be kept or removed, decrease the counter
    bool keep_dequant = (*nodes_keep_counter)[dequant_out]-- > 1;
    if (dequant_scale == quant_scale) {
      // squash dequantize-quantize to nothing
      auto quant_out_var_name = quant_out->Name();
      auto next_op_inputs = next_op_desc->InputNames();
      for (const auto& name : next_op_inputs) {
        auto var_name = next_op_desc->Input(name)[0];
        if (var_name.compare(quant_out_var_name) == 0) {
          next_op_desc->SetInput(
              name, std::vector<std::string>({dequant_in->Name()}));
          break;
        }
      }
      if (keep_dequant)
        GraphSafeRemoveNodes(graph, {quant_op, quant_out});
      else
        GraphSafeRemoveNodes(graph,
                             {dequant_op, quant_op, dequant_out, quant_out});
      IR_NODE_LINK_TO(dequant_in, next_op);
      found_squash_count++;
    } else {
      // squash dequantize-quantize to requantize op
      OpDesc desc;
      desc.SetType("requantize");
      desc.SetInput("Input", std::vector<std::string>({dequant_in->Name()}));
      desc.SetOutput("Output", std::vector<std::string>({quant_out->Name()}));
      desc.SetAttr("Scale_in", dequant_scale);
      desc.SetAttr("Scale_out", quant_scale);
      auto requant_op = g->CreateOpNode(&desc);
      if (keep_dequant)
        GraphSafeRemoveNodes(graph, {quant_op});
      else
        GraphSafeRemoveNodes(graph, {dequant_op, quant_op, dequant_out});
      IR_NODE_LINK_TO(dequant_in, requant_op);
      IR_NODE_LINK_TO(requant_op, quant_out);
      found_squash_count++;
    }
  };
  gpd(graph, handler);
  AddStatis(found_squash_count);
  PrettyLogDetail("---    squashed %d dequantize-quantize pairs",
                  found_squash_count);
 }
 std::unique_ptr<ir::Graph> CPUQuantizeSquashPass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  PADDLE_ENFORCE(graph.get());
  FusePassBase::Init("cpu_quantize_squash_pass", graph.get());
  std::unordered_map<const Node*, int> nodes_keep_counter;
  FindNodesToKeep(graph.get(), &nodes_keep_counter);
  Squash(graph.get(), &nodes_keep_counter);
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(cpu_quantize_squash_pass,
              paddle::framework::ir::CPUQuantizeSquashPass);
--- a/paddle/fluid/framework/ir/cpu_quantize_squash_pass.h
+++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.h
@ -0,0 +1,58 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 /*
 * Squash dequantize->quantize pair pattern into requantize op
 */
 class CPUQuantizeSquashPass : public FusePassBase {
 public:
  virtual ~CPUQuantizeSquashPass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(
      std::unique_ptr<ir::Graph> graph) const override;
  /*
   * For each dequantize's output find the number of operators it is an input to
   */
  void FindNodesToKeep(
      Graph* graph,
      std::unordered_map<const Node*, int>* nodes_keep_counter) const;
  /*
   * Squash dequantize-quantize ops pairs into requantize or nothing
   */
  void Squash(Graph* graph,
              std::unordered_map<const Node*, int>* nodes_keep_counter) const;
  const std::string name_scope_{"squash"};
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass_tester.cc
@ -0,0 +1,179 @@
 // Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/cpu_quantize_squash_pass.h"
 #include <gtest/gtest.h>
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/platform/place.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
           const std::vector<std::string>& inputs,
           const std::vector<std::string>& outputs, bool use_mkldnn,
           float scale = 0) {
  auto* op = prog->MutableBlock(0)->AppendOp();
  op->SetType(type);
  op->SetAttr("use_mkldnn", use_mkldnn);
  op->SetAttr("name", name);
  if (type == "conv2d") {
    op->SetInput("Input", {inputs[0]});
    if (inputs.size() > 1) op->SetInput("Filter", {inputs[1]});
    if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]});
    op->SetOutput("Output", {outputs[0]});
  } else if (type == "quantize") {
    op->SetInput("Input", {inputs[0]});
    op->SetOutput("Output", {outputs[0]});
    op->SetAttr("Scale", scale);
  } else if (type == "dequantize") {
    op->SetInput("Input", {inputs[0]});
    op->SetOutput("Output", {outputs[0]});
    op->SetAttr("Scale", scale);
  }
 }
 // (a,w1,b1)->Conv1->d
 // d->Dequant->e
 // e->Quant->f
 // (f,w2,b2)->Conv2->i
 ProgramDesc BuildProgramDesc(bool use_mkldnn, float scale1, float scale2) {
  ProgramDesc prog;
  for (auto& v : std::initializer_list<std::string>(
           {"a", "w1", "b1", "d", "e", "f", "w2", "b2", "i"})) {
    auto* var = prog.MutableBlock(0)->Var(v);
    if (v.find("w") == 0 || v.find("b") == 0) {
      var->SetPersistable(true);
    }
  }
  SetOp(&prog, "conv2d", "Conv1", {"a", "w1", "b1"}, {"d"}, use_mkldnn);
  SetOp(&prog, "dequantize", "Dequant", {"d"}, {"e"}, use_mkldnn, scale1);
  SetOp(&prog, "quantize", "Quant", {"e"}, {"f"}, use_mkldnn, scale2);
  SetOp(&prog, "conv2d", "Conv2", {"f", "w2", "b2"}, {"i"}, use_mkldnn);
  return prog;
 }
 static const std::initializer_list<std::string> variable_names{
    "a", "b", "c", "d", "e", "f", "g", "h"};
 // a->Conv1->b
 // b->Dequant->c
 //
 // c->Quant1->d and d->Conv2->e
 //
 // c->Conv3->f
 //
 // c->Quant2->g and g->Conv4->h
 //
 ProgramDesc BuildProgramDesc2(bool use_mkldnn, float scale1, float scale2,
                              float scale3) {
  ProgramDesc prog;
  for (auto& v : variable_names) {
    prog.MutableBlock(0)->Var(v);
  }
  SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn);
  SetOp(&prog, "dequantize", "Dequant", {"b"}, {"c"}, use_mkldnn, scale1);
  SetOp(&prog, "quantize", "Quant1", {"c"}, {"d"}, use_mkldnn, scale2);
  SetOp(&prog, "conv2d", "Conv2", {"d"}, {"e"}, use_mkldnn);
  SetOp(&prog, "conv2d", "Conv3", {"c"}, {"f"}, use_mkldnn);
  SetOp(&prog, "quantize", "Quant2", {"c"}, {"g"}, use_mkldnn, scale3);
  SetOp(&prog, "conv2d", "Conv4", {"g"}, {"h"}, use_mkldnn);
  return prog;
 }
 void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                      const char* var_name) {
  auto x = scope->Var(var_name);
  auto tensor = x->GetMutable<LoDTensor>();
  tensor->mutable_data(place, proto::VarType::FP32,
                       ::paddle::memory::Allocator::kDefault, 1);
 }
 void MainTest(const ProgramDesc& prog, int removed_nodes_num) {
  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
  // Init scope, as it is used in pass
  auto place = paddle::platform::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
  exe.CreateVariables(prog, 0, true, &scope);
  for (auto& v : variable_names) {
    InitTensorHolder(&scope, place, v.c_str());
  }
  graph->Set(kParamScopeAttr, new framework::Scope*(&scope));
  auto pass = PassRegistry::Instance().Get("cpu_quantize_squash_pass");
  int original_nodes_num = graph->Nodes().size();
  graph = pass->Apply(std::move(graph));
  int current_nodes_num = graph->Nodes().size();
  EXPECT_EQ(original_nodes_num - removed_nodes_num, current_nodes_num);
 }
 TEST(CpuQuantizeSquashPass, equal_scales) {
  auto scale = 1.2345f;
  auto use_mkldnn = true;
  // Remove 4 nodes: Dequant, Quant, e, f
  auto remove_nodes = 4;
  MainTest(BuildProgramDesc(use_mkldnn, scale, scale), remove_nodes);
  use_mkldnn = !use_mkldnn;
  MainTest(BuildProgramDesc(use_mkldnn, scale, scale), remove_nodes);
 }
 TEST(CpuQuantizeSquashPass, inequal_scales) {
  auto scale1 = 1.2345f;
  auto scale2 = 21.0f;
  auto use_mkldnn = true;
  // Remove 3 nodes: Dequant, Quant, e
  // Insert 1 node: requantize
  auto remove_nodes = 2;
  MainTest(BuildProgramDesc(use_mkldnn, scale1, scale2), remove_nodes);
  use_mkldnn = !use_mkldnn;
  MainTest(BuildProgramDesc(use_mkldnn, scale1, scale2), remove_nodes);
 }
 TEST(CpuQuantizeSquashPass, branch_to_equal_inequal_and_fp32) {
  // Delete both quantize ops,
  // bypass dequantize in both branches,
  // insert requantize on one branch
  auto scale = 1.2345f;
  auto scale2 = 21.0f;
  auto use_mkldnn = true;
  // Remove 3 nodes: Quant1, Quant2, g
  // Insert 1 node: requantize
  auto remove_nodes = 2;
  MainTest(BuildProgramDesc2(use_mkldnn, scale, scale, scale2), remove_nodes);
  use_mkldnn = !use_mkldnn;
  MainTest(BuildProgramDesc2(use_mkldnn, scale, scale, scale2), remove_nodes);
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(cpu_quantize_squash_pass);
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@ -90,7 +90,8 @@ void GraphPatternDetector::operator()(Graph *graph,
  ValidateByNodeRole(&subgraphs);
  if (subgraphs.empty()) return;
-  PrettyLogEndl(Style::detail(), "---  detect %d subgraphs", subgraphs.size());
+  PrettyLogEndl(Style::detail(), "---  detected %d subgraphs",
                subgraphs.size());
  int id = 0;
  for (auto &g : subgraphs) {
    VLOG(3) << "optimizing #" << id++ << " subgraph";
@ -1074,9 +1075,53 @@ PDNode *patterns::Conv::operator()() {
                        ->AsOutput()
                        ->assert_is_op_output("conv2d", "Output");
-  conv_op->LinksFrom({input_var, filter_var});
+  conv_op->LinksFrom({input_var, filter_var}).LinksTo({output_var});
-  conv_op->LinksTo({output_var});
+  return output_var;
 }
 PDNode *patterns::ConvResidual::operator()(bool with_residual_data) {
  auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
  if (!with_residual_data)
    conv_op->assert_op_attr("fuse_residual_connection", false);
  auto input_var = pattern->NewNode(conv_input_repr())
                       ->AsInput()
                       ->assert_is_op_input("conv2d", "Input");
  auto filter_var = pattern->NewNode(conv_filter_repr())
                        ->AsInput()
                        ->assert_is_op_input("conv2d", "Filter");
  auto output_var = pattern->NewNode(conv_output_repr())
                        ->AsOutput()
                        ->assert_is_op_output("conv2d", "Output");
  std::vector<PDNode *> links_from{input_var, filter_var};
  if (with_residual_data) {
    auto res_conn_var = pattern->NewNode(conv_residual_data_repr())
                            ->AsInput()
                            ->assert_is_op_input("conv2d", "ResidualData");
    links_from.push_back(res_conn_var);
  }
  conv_op->LinksFrom(links_from).LinksTo({output_var});
  return output_var;
 }
 PDNode *patterns::Pool::operator()() {
  auto pool_op = pattern->NewNode(pool_op_repr())->assert_is_op("pool2d");
  auto input_var = pattern->NewNode(pool_input_repr())
                       ->AsInput()
                       ->assert_is_op_input("pool2d", "X");
  auto output_var = pattern->NewNode(pool_output_repr())
                        ->AsOutput()
                        ->assert_is_op_output("pool2d", "Out");
  pool_op->LinksFrom({input_var}).LinksTo({output_var});
  return output_var;
 }
@ -1301,6 +1346,51 @@ PDNode *patterns::ConvAffineChannel::operator()(
  return ac_out_var;
 }
 PDNode *patterns::DequantQuantAny::operator()() {
  auto *dequant_in = pattern->NewNode(dequant_in_repr())
                         ->AsInput()
                         ->assert_is_op_input("dequantize", "Input");
  auto *dequant_op =
      pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize");
  auto *dequant_out = pattern->NewNode(dequant_out_repr())
                          ->AsOutput()
                          ->assert_is_op_output("dequantize", "Output");
  auto *quant_op = pattern->NewNode(quant_op_repr())
                       ->assert_is_op("quantize")
                       ->AsIntermediate();
  auto *quant_out = pattern->NewNode(quant_out_repr())
                        ->AsOutput()
                        ->assert_is_op_output("quantize");
  auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
  dequant_op->LinksFrom({dequant_in}).LinksTo({dequant_out});
  quant_op->LinksFrom({dequant_out}).LinksTo({quant_out});
  next_op->LinksFrom({quant_out});
  return quant_out;
 }
 PDNode *patterns::DequantAny::operator()() {
  auto *dequant_op =
      pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize");
  auto *dequant_out = pattern->NewNode(dequant_out_repr())
                          ->AsOutput()
                          ->assert_is_op_output("dequantize", "Output");
  auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
  dequant_op->LinksTo({dequant_out});
  next_op->LinksFrom({dequant_out});
  return dequant_out;
 }
 // a -> transpose_op(1) -> transpose_out_a -> flatten_op(1) -> flatten_out_a
 // b -> transpose_op(2) -> transpose_out_b -> flatten_op(2) -> flatten_out_b
 // ...
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@ -18,8 +18,11 @@
 #include <gtest/gtest_prod.h>
 #endif
 #include <memory>
 #include <numeric>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/ir/graph.h"
@ -656,6 +659,35 @@ struct Conv : public PatternBase {
  PATTERN_DECL_NODE(conv_output);
 };
 // Convolution op with residual data
 struct ConvResidual : public PatternBase {
  ConvResidual(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "conv_residual") {}
  PDNode* operator()(bool with_residual_data);
  PATTERN_DECL_NODE(conv_op);
  PATTERN_DECL_NODE(conv_input);
  PATTERN_DECL_NODE(conv_filter);
  PATTERN_DECL_NODE(conv_residual_data);
  PATTERN_DECL_NODE(conv_output);
 };
 // Pool op
 // Forward pass for pooling.
 // pool_input is the input.
 // pool_output is a result of the operator.
 struct Pool : public PatternBase {
  Pool(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "pooling") {}
  PDNode* operator()();
  PATTERN_DECL_NODE(pool_op);
  PATTERN_DECL_NODE(pool_input);
  PATTERN_DECL_NODE(pool_output);
 };
 // ElementwiseAdd used in residual connections.
 // y_var is used and convolution output.
 // The operator is removed, when residual
@ -766,6 +798,34 @@ struct ConvAffineChannel : public PatternBase {
  PATTERN_DECL_NODE(ac_out);  // Out
 };
 // Dequantize + Quantize + anyOP
 // This pattern is used for squashing the dequantize-quantize pairs.
 struct DequantQuantAny : public PatternBase {
  DequantQuantAny(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "dequant_quant_any") {}
  PDNode* operator()();
  PATTERN_DECL_NODE(dequant_in);
  PATTERN_DECL_NODE(dequant_op);
  PATTERN_DECL_NODE(dequant_out);
  PATTERN_DECL_NODE(quant_op);
  PATTERN_DECL_NODE(quant_out);
  PATTERN_DECL_NODE(next_op);
 };
 // Dequantize + anyOP
 // This quantize is used for getting number of ops the Dequantize's
 // output is an input to.
 struct DequantAny : public PatternBase {
  DequantAny(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "dequant_any") {}
  PDNode* operator()();
  PATTERN_DECL_NODE(dequant_op);
  PATTERN_DECL_NODE(dequant_out);
  PATTERN_DECL_NODE(next_op);
 };
 struct TransposeFlattenConcat : public PatternBase {
  TransposeFlattenConcat(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "transpose_flatten_concat") {}
--- a/paddle/fluid/framework/ir/graph_test.cc
+++ b/paddle/fluid/framework/ir/graph_test.cc
@ -43,20 +43,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
 class SumOpVarTypeInference : public VarTypeInference {
 public:
-  void operator()(const OpDesc &op_desc, BlockDesc *block) const override {
+  void operator()(InferVarTypeContext *ctx) const override {
-    auto &inputs = op_desc.Input("X");
+    auto &inputs = ctx->Input("X");
    auto default_var_type = proto::VarType::SELECTED_ROWS;
    bool any_input_is_lod_tensor = std::any_of(
-        inputs.begin(), inputs.end(), [block](const std::string &name) {
+        inputs.begin(), inputs.end(), [&ctx](const std::string &name) {
-          return block->Var(name)->GetType() == proto::VarType::LOD_TENSOR;
+          return ctx->GetType(name) == proto::VarType::LOD_TENSOR;
        });
    if (any_input_is_lod_tensor) {
      default_var_type = proto::VarType::LOD_TENSOR;
    }
-    auto out_var_name = op_desc.Output("Out").front();
+    auto out_var_name = ctx->Output("Out").front();
-    block->Var(out_var_name)->SetType(default_var_type);
+    ctx->SetType(out_var_name, default_var_type);
  }
 };
@ -71,7 +71,7 @@ class DummyOpMaker : public OpProtoAndCheckerMaker {
 class DummyOpVarTypeInference : public VarTypeInference {
 public:
-  void operator()(const OpDesc &op_desc, BlockDesc *block) const override {}
+  void operator()(framework::InferVarTypeContext *ctx) const override {}
 };
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/runtime_context_cache_pass.cc
+++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc
@ -0,0 +1,39 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/ir/runtime_context_cache_pass.h"
 #include <memory>
 #include "paddle/fluid/framework/operator.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 std::unique_ptr<ir::Graph> RuntimeContextCachePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  VLOG(3) << "Applies Runtime Context Cache strategy.";
  for (const Node* n : graph->Nodes()) {
    if (n->IsOp()) {
      n->Op()->SetAttr(kEnableCacheRuntimeContext, true);
    }
  }
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(runtime_context_cache_pass,
              paddle::framework::ir::RuntimeContextCachePass);
--- a/paddle/fluid/framework/ir/runtime_context_cache_pass.h
+++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.h
@ -0,0 +1,32 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <memory>
 #include "paddle/fluid/framework/ir/pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 class RuntimeContextCachePass : public Pass {
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(
      std::unique_ptr<ir::Graph> graph) const override;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/sync_batch_norm_pass.cc
+++ b/paddle/fluid/framework/ir/sync_batch_norm_pass.cc
@ -0,0 +1,45 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/ir/sync_batch_norm_pass.h"
 #include <memory>
 #include <string>
 #include <utility>
 namespace paddle {
 namespace framework {
 namespace ir {
 std::unique_ptr<ir::Graph> SyncBatchNormPass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  VLOG(3) << "Use synchronous batch norm";
  for (const Node* n : graph->Nodes()) {
    if (n->IsOp()) {
      auto* op = n->Op();
      if (op->Type() == "batch_norm") {
        op->SetType("sync_batch_norm");
      }
      if (op->Type() == "batch_norm_grad") {
        op->SetType("sync_batch_norm_grad");
      }
    }
  }
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(sync_batch_norm_pass, paddle::framework::ir::SyncBatchNormPass);
--- a/Show More
+++ b/Show More