Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into imperative_mnist

test=develop
7 years ago · 9e3155e01d
parent 6bb84490af 3e8408429d
commit 9e3155e01d
78 changed files with 2230 additions and 1127 deletions
--- a/76
+++ b/76
@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
 # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
 # version(1.7.1 for now), which causes building documentation failed.
-RUN pip3 install -U wheel && \
+RUN pip3 --no-cache-dir install -U wheel && \
-    pip3 install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
+    pip3 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.6 install -U wheel && \
+    pip3.6 --no-cache-dir install -U wheel && \
-    pip3.6 install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3.6 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3.6 install sphinx-rtd-theme==0.1.9 recommonmark && \
+    pip3.6 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.7 install -U wheel && \
+    pip3.7 --no-cache-dir install -U wheel && \
-    pip3.7 install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3.7 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3.7 install sphinx-rtd-theme==0.1.9 recommonmark && \
+    pip3.7 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
    easy_install -U pip && \
-    pip install -U pip setuptools wheel && \
+    pip --no-cache-dir install -U pip setuptools wheel && \
-    pip install -U docopt PyYAML sphinx==1.5.6 && \
+    pip --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
-    pip install sphinx-rtd-theme==0.1.9 recommonmark
+    pip --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark
-
+
-RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+RUN pip3 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3 install opencv-python && \
+    pip3 --no-cache-dir install opencv-python && \
-    pip3.6 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip3.6 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3.6 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3.6 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3.6 install opencv-python && \
+    pip3.6 --no-cache-dir install opencv-python && \
-    pip3.7 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip3.7 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3.7 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3.7 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3.7 install opencv-python && \
+    pip3.7 --no-cache-dir install opencv-python && \
-    pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip install opencv-python
+    pip --no-cache-dir install opencv-python
 #For docstring checker
-RUN pip3 install pylint pytest astroid isort
+RUN pip3 --no-cache-dir install pylint pytest astroid isort
-RUN pip3.6 install pylint pytest astroid isort
+RUN pip3.6 --no-cache-dir install pylint pytest astroid isort
-RUN pip3.7 install pylint pytest astroid isort
+RUN pip3.7 --no-cache-dir install pylint pytest astroid isort
-RUN pip install pylint pytest astroid isort LinkChecker
+RUN pip --no-cache-dir install pylint pytest astroid isort LinkChecker
 COPY ./python/requirements.txt /root/
-RUN pip3 install -r /root/requirements.txt
+RUN pip3 --no-cache-dir install -r /root/requirements.txt
-RUN pip3.6 install -r /root/requirements.txt
+RUN pip3.6 --no-cache-dir install -r /root/requirements.txt
-RUN pip3.7 install -r /root/requirements.txt
+RUN pip3.7 --no-cache-dir install -r /root/requirements.txt
-RUN pip install -r /root/requirements.txt
+RUN pip --no-cache-dir install -r /root/requirements.txt
 # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
 # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
-RUN apt-get install -y libssl-dev libffi-dev
+RUN apt-get install -y libssl-dev libffi-dev && apt-get clean -y
-RUN pip3 install certifi urllib3[secure]
+RUN pip3 --no-cache-dir install certifi urllib3[secure]
-RUN pip3.6 install certifi urllib3[secure]
+RUN pip3.6 --no-cache-dir install certifi urllib3[secure]
-RUN pip3.7 install certifi urllib3[secure]
+RUN pip3.7 --no-cache-dir install certifi urllib3[secure]
-RUN pip install certifi urllib3[secure]
+RUN pip --no-cache-dir install certifi urllib3[secure]
 # Install woboq_codebrowser to /woboq
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@ -106,10 +106,10 @@ else(WIN32)
    SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
    ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
            COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB}
-            DEPENDS mkldnn)
+            DEPENDS mkldnn shared_mkldnn)
 endif(WIN32)
 ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB})
-
+ADD_DEPENDENCIES(mkldnn_shared_lib ${MKLDNN_PROJECT} mkldnn)
 IF(WITH_C_API)
  INSTALL(FILES ${MKLDNN_SHARED_LIB} DESTINATION lib)
 ENDIF()
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -136,7 +136,7 @@ if (WITH_MKLDNN)
    copy(mkldnn_lib
            SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
            DSTS ${dst_dir} ${dst_dir}/lib
-            DEPS mkldnn
+            DEPS mkldnn_shared_lib
            )
 endif ()
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -68,18 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
 cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
 cc_test(reader_test SRCS reader_test.cc DEPS reader)
 cc_test(variable_test SRCS variable_test.cc)
 cc_library(threadpool SRCS threadpool.cc DEPS enforce)
 cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
-cc_library(scope SRCS scope.cc DEPS glog threadpool)
+cc_library(var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto) 
 if (WITH_GPU)
  target_link_libraries(var_type_traits dynload_cuda)
 endif()
 cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits)
 cc_library(scope SRCS scope.cc DEPS glog threadpool var_type_traits)
 cc_library(scope_pool SRCS scope_pool.cc DEPS scope)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
 cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits)
 cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
 nv_test(data_device_transform_test SRCS data_device_transform_test.cu
-        DEPS operator op_registry device_context math_function)
+        DEPS operator op_registry device_context math_function scope)
 if(WITH_GPU)
  if (WIN32)
--- a/paddle/fluid/framework/data_device_transform_test.cu
+++ b/paddle/fluid/framework/data_device_transform_test.cu
@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/device_context.h"
--- a/paddle/fluid/framework/details/eager_deletion_op_handle.cc
+++ b/paddle/fluid/framework/details/eager_deletion_op_handle.cc
@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
      }
    } else {
      PADDLE_THROW("Type %s of %s is not supported eager deletion",
-                   var->Type().name(), name);
+                   framework::ToTypeName(var->Type()), name);
    }
  }
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h
@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
 #endif
  int GetVarDeviceID(
-      const ir::Graph &graph, const std::string &varname,
+      const std::string &varname,
      const std::unordered_map<std::string, int> &sharded_var_device) const;
  bool IsScaleLossOp(ir::Node *node) const;
@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
      ir::Graph *result, ir::Node *node,
      std::unordered_map<std::string, int> *sharded_var_device) const;
  std::vector<std::string> FindDistTrainSendVars(
      const std::vector<ir::Node *> &nodes) const;
  std::vector<std::string> FindDistTrainRecvVars(
      const std::vector<ir::Node *> &nodes) const;
  void CreateComputationalOps(ir::Graph *result, ir::Node *node,
                              size_t num_places) const;
@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
                             int dev_id) const;
  int GetOpDeviceID(
-      const ir::Graph &graph, ir::Node *node,
+      ir::Node *node,
      const std::unordered_map<std::string, int> &sharded_var_device) const;
  void InsertAllReduceOp(ir::Graph *result, const std::string &og) const;
@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
  void SetCommunicationContext(OpHandleBase *op_handle,
                               const platform::Place &p) const;
  std::vector<ir::Node *> SortForReduceMode(
      const std::vector<ir::Node *> &) const;
  int GetOpDeviceID(
      ir::Node *node,
      const std::unordered_map<std::string, int> &shared_var_device,
      std::unordered_map<std::string, std::vector<ir::Node *>> *delay_ops)
      const;
  mutable std::string loss_var_name_;
  mutable std::vector<platform::Place> places_;
  mutable std::vector<Scope *> local_scopes_;
--- a/paddle/fluid/framework/details/variable_visitor.cc
+++ b/paddle/fluid/framework/details/variable_visitor.cc
@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
  } else if (var->IsType<SelectedRows>()) {
    (*func)(var->GetMutable<SelectedRows>());
  } else {
-    PADDLE_THROW("Not supported type %s", var->Type().name());
+    PADDLE_THROW("Not supported type %s", ToTypeName(var->Type()));
  }
 }
@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
  } else if (var.IsType<SelectedRows>()) {
    (*func)(var.Get<SelectedRows>());
  } else {
-    PADDLE_THROW("Not supported type %s", var.Type().name());
+    PADDLE_THROW("Not supported type %s", ToTypeName(var.Type()));
  }
 }
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
          }
        } else {
          PADDLE_THROW("Type %s of %s is not supported eager deletion",
-                       var->Type().name(), name);
+                       framework::ToTypeName(var->Type()), name);
        }
      }
    }
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -45,6 +45,7 @@ pass_library(is_test_pass base)
 pass_library(conv_elementwise_add_act_fuse_pass inference)
 pass_library(conv_elementwise_add2_act_fuse_pass inference)
 pass_library(conv_elementwise_add_fuse_pass inference)
 pass_library(conv_affine_channel_fuse_pass inference)
 if(WITH_MKLDNN)
    pass_library(mkldnn_placement_pass base)
    pass_library(depthwise_conv_mkldnn_pass base)
--- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
@ -0,0 +1,222 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
 #include <functional>
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/platform/enforce.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 #define GET_CONV_BN_NODES(pattern_name)                                    \
  /* OPERATORS */                                                          \
  GET_IR_NODE_FROM_SUBGRAPH(conv, conv, pattern_name);                     \
  GET_IR_NODE_FROM_SUBGRAPH(affine_channel, affine_channel, pattern_name); \
  /* CONV inputs */                                                        \
  GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, pattern_name);       \
  /* CONV outputs */                                                       \
  GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, pattern_name);             \
  /* Affine Channel inputs */                                              \
  GET_IR_NODE_FROM_SUBGRAPH(ac_scale, ac_scale, pattern_name);             \
  GET_IR_NODE_FROM_SUBGRAPH(ac_bias, ac_bias, pattern_name);               \
  /* Affine channel outputs */                                             \
  GET_IR_NODE_FROM_SUBGRAPH(ac_out, ac_out, pattern_name); /* Out */
 void recompute_bias_and_weights(const Scope* scope, ir::Node* conv_weight,
                                const ir::Node& ac_scale,
                                const LoDTensor& ac_bias_tensor,
                                LoDTensor* eltwise_y_in_tensor) {
  using EigenVectorArrayMap =
      Eigen::Map<Eigen::Array<float, Eigen::Dynamic, 1>>;
  using ConstEigenVectorArrayMap =
      Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
  using EigenMatrixArrayMap = Eigen::Map<
      Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
  // Re-compute bias of conv2d from AffineChannel
  PADDLE_ENFORCE_EQ(eltwise_y_in_tensor->dims(), ac_bias_tensor.dims());
  auto* scale_tensor = scope->FindVar(ac_scale.Name())->GetMutable<LoDTensor>();
  ConstEigenVectorArrayMap scale_array(scale_tensor->data<float>(),
                                       scale_tensor->numel(), 1);
  ConstEigenVectorArrayMap ac_bias_array(ac_bias_tensor.data<float>(),
                                         ac_bias_tensor.numel(), 1);
  EigenVectorArrayMap eltwise_y_in_array(
      eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
      eltwise_y_in_tensor->numel(), 1);
  eltwise_y_in_array = (eltwise_y_in_array * scale_array) + ac_bias_array;
  // Re-compute weight of conv2d from AffineChannel
  auto* weights = scope->FindVar(conv_weight->Name())->GetMutable<LoDTensor>();
  auto weights_shape = weights->dims();
  auto weights_shape_2d = flatten_to_2d(weights_shape, 1);
  EigenMatrixArrayMap weights_array_2d(
      weights->mutable_data<float>(platform::CPUPlace()), weights_shape_2d[0],
      weights_shape_2d[1]);
  weights_array_2d.colwise() *= scale_array;
 }
 std::unique_ptr<ir::Graph> ConvAffineChannelFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  PADDLE_ENFORCE(graph.get());
  FusePassBase::Init(name_scope_, graph.get());
  auto* scope = param_scope();
  PADDLE_ENFORCE(scope);
  GraphPatternDetector gpd;
  auto* conv_input =
      gpd.mutable_pattern()
          ->NewNode(patterns::PDNodeName(name_scope_, "conv_input"))
          ->AsInput()
          ->assert_is_op_input("conv2d", "Input");
  patterns::ConvAffineChannel conv_ac_pattern(gpd.mutable_pattern(),
                                              name_scope_);
  conv_ac_pattern(conv_input, false /*with_eltwise_add*/);
  int found_conv_ac_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "handle ConvAffineChannel fuse";
    GET_CONV_BN_NODES(conv_ac_pattern);
    // check if fuse can be done and if MKL-DNN should be used
    FuseOptions fuse_option = FindFuseOption(*conv, *affine_channel);
    if (fuse_option == DO_NOT_FUSE) {
      VLOG(3) << "do not perform conv+affinechannel fuse";
      return;
    }
    // Create eltwise_y (conv bias) variable
    VarDesc eltwise_y_in_desc(
        patterns::PDNodeName(name_scope_, "eltwise_y_in"));
    eltwise_y_in_desc.SetPersistable(true);
    auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
    auto* eltwise_y_in_tensor =
        scope->Var(eltwise_y_in_node->Name())->GetMutable<LoDTensor>();
    // Get affine_channel bias
    auto* ac_bias_tensor =
        scope->FindVar(ac_bias->Name())->GetMutable<LoDTensor>();
    // Initialize eltwise_y
    eltwise_y_in_tensor->Resize(ac_bias_tensor->dims());
    std::fill_n(eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
                eltwise_y_in_tensor->numel(), 0.0f);
    // update weights and biases
    recompute_bias_and_weights(scope, conv_weight, *ac_scale, *ac_bias_tensor,
                               eltwise_y_in_tensor);
    // create an elementwise add node.
    OpDesc desc;
    desc.SetInput("X", std::vector<std::string>({conv_out->Name()}));
    desc.SetInput("Y", std::vector<std::string>({eltwise_y_in_node->Name()}));
    desc.SetOutput("Out", std::vector<std::string>({ac_out->Name()}));
    desc.SetType("elementwise_add");
    desc.SetAttr("axis", 1);
    auto eltwise_op = g->CreateOpNode(&desc);  // OpDesc will be copied.
    GraphSafeRemoveNodes(graph.get(), {ac_scale, ac_bias, affine_channel});
    IR_NODE_LINK_TO(conv_out, eltwise_op);
    IR_NODE_LINK_TO(eltwise_y_in_node, eltwise_op);
    IR_NODE_LINK_TO(eltwise_op, ac_out);
    found_conv_ac_count++;
  };
  gpd(graph.get(), handler);
  AddStatis(found_conv_ac_count);
  return graph;
 }
 std::unique_ptr<ir::Graph> ConvEltwiseAddAffineChannelFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  PADDLE_ENFORCE(graph.get());
  FusePassBase::Init(name_scope_, graph.get());
  auto* scope = param_scope();
  PADDLE_ENFORCE(scope);
  GraphPatternDetector gpd;
  auto* conv_input =
      gpd.mutable_pattern()
          ->NewNode(patterns::PDNodeName(name_scope_, "conv_input"))
          ->AsInput()
          ->assert_is_op_input("conv2d", "Input");
  patterns::ConvAffineChannel conv_ac_pattern(gpd.mutable_pattern(),
                                              name_scope_);
  conv_ac_pattern(conv_input, true /*with_eltwise_add*/);
  int found_conv_ac_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "handle ConvBN fuse";
    GET_CONV_BN_NODES(conv_ac_pattern);
    // OPERATORS
    GET_IR_NODE_FROM_SUBGRAPH(eltwise, eltwise, conv_ac_pattern);
    // BIAS inputs
    GET_IR_NODE_FROM_SUBGRAPH(eltwise_y_in, eltwise_y_in, conv_ac_pattern);
    // BIAS outputs
    GET_IR_NODE_FROM_SUBGRAPH(eltwise_out, eltwise_out, conv_ac_pattern);
    // Get eltwise_y (conv bias) variable
    auto* eltwise_y_in_tensor =
        scope->FindVar(eltwise_y_in->Name())->GetMutable<LoDTensor>();
    // Get batch norm bias
    auto* ac_bias_tensor =
        scope->FindVar(ac_bias->Name())->GetMutable<LoDTensor>();
    recompute_bias_and_weights(scope, conv_weight, *ac_scale, *ac_bias_tensor,
                               eltwise_y_in_tensor);
    // Update the elementwise_add node
    eltwise->Op()->SetAttr("axis", 1);
    eltwise->Op()->SetOutput("Out", std::vector<std::string>({ac_out->Name()}));
    GraphSafeRemoveNodes(graph.get(),
                         {ac_scale, ac_bias, affine_channel, eltwise_out});
    IR_NODE_LINK_TO(eltwise, ac_out);
    found_conv_ac_count++;
  };
  gpd(graph.get(), handler);
  AddStatis(found_conv_ac_count);
  return graph;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(conv_affine_channel_fuse_pass,
              paddle::framework::ir::ConvAffineChannelFusePass);
 REGISTER_PASS(conv_eltwiseadd_affine_channel_fuse_pass,
              paddle::framework::ir::ConvEltwiseAddAffineChannelFusePass);
--- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
+++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
@ -0,0 +1,49 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <string>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 /*
 * Fuse the Conv and ConvAffineChannel.
 */
 class ConvAffineChannelFusePass : public FusePassBase {
 public:
  virtual ~ConvAffineChannelFusePass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
  const std::string name_scope_{"conv_affine_channel_fuse"};
 };
 class ConvEltwiseAddAffineChannelFusePass : public FusePassBase {
 public:
  virtual ~ConvEltwiseAddAffineChannelFusePass() {}
 protected:
  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
  const std::string name_scope_{"conv_eltwiseadd_affine_channel_fuse"};
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@ -23,66 +23,8 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 namespace ir {
 namespace {
 void CheckProgram(const ProgramDesc &program) {
 #define _INT(role) static_cast<int>(role)
  std::map<int, bool> visit;
  for (OpDesc *op : program.Block(0).AllOps()) {
    // For backward compatibility, some program doesn't have role added.
    if (!op->HasAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) continue;
    int role_id =
        boost::get<int>(op->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
    visit[role_id] = true;
    switch (role_id) {
      case _INT(OpRole::kForward):
        if (visit.find(_INT(OpRole::kBackward)) != visit.end()) {
          LOG(ERROR) << "Cannot add backward operator before forward operator "
                     << op->Type();
        }
        break;
      case _INT(OpRole::kBackward):
      case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
        PADDLE_ENFORCE(
            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
            "Cannot add backward operator %s after optimize operator.",
            op->Type());
        break;
      case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
                                  _INT(OpRole::kLoss)) == visit.end(),
                       "Cannot add backward|loss operator before "
                       "forward|loss operator %s.",
                       op->Type());
        PADDLE_ENFORCE(
            visit.find(_INT(OpRole::kOptimize)) == visit.end(),
            "Cannot add forward|loss operator %s after optimize operator.",
            op->Type());
        break;
      case _INT(OpRole::kOptimize):
      case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
        PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
                       "Optimize operators %s must follow backward operator.",
                       op->Type());
        break;
      case _INT(OpRole::kLRSched):
      case _INT(OpRole::kDist):
      case _INT(OpRole::kRPC):
      case _INT(OpRole::kNotSpecified):
        break;
      default:
        LOG(FATAL) << "Unknown operator role. Don't add new role because "
                      "you don't know what you are doing.";
    }
  }
 #undef _INT
 }
 }  // namespace
 Graph::Graph(const ProgramDesc &program) : program_(program) {
  CheckProgram(program_);
  auto var_nodes = InitFromProgram(program_);
  ResolveHazard(var_nodes);
 }
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@ -1234,6 +1234,78 @@ PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) {
  return elementwise_add_out;
 }
 PDNode *patterns::ConvAffineChannel::operator()(
    paddle::framework::ir::PDNode *conv_input, bool with_eltwise_add) {
  // Create Operators
  conv_input->assert_is_op_input("conv2d", "Input");
  auto *conv_op = pattern->NewNode(conv_repr())->assert_is_op("conv2d");
  PDNode *eltwise_op = nullptr;
  if (with_eltwise_add) {
    eltwise_op =
        pattern->NewNode(eltwise_repr())->assert_is_op("elementwise_add");
  }
  auto *affine_channel_op =
      pattern->NewNode(affine_channel_repr())->assert_is_op("affine_channel");
  // Create variables
  // Conv Filter
  auto *conv_weight_var = pattern->NewNode(conv_weight_repr())
                              ->AsInput()
                              ->assert_is_persistable_var()
                              ->assert_is_op_input("conv2d", "Filter");
  auto *conv_out_var = pattern->NewNode(conv_out_repr())
                           ->AsIntermediate()
                           ->assert_is_only_output_of_op("conv2d");
  PDNode *eltwise_y_in_var = nullptr;
  PDNode *eltwise_out_var = nullptr;
  if (with_eltwise_add) {
    // Conv output as Bias input
    conv_out_var->assert_is_op_input("elementwise_add", "X");
    // Bias
    eltwise_y_in_var = pattern->NewNode(eltwise_y_in_repr())
                           ->assert_is_op_input("elementwise_add", "Y")
                           ->AsInput();
    eltwise_out_var = pattern->NewNode(eltwise_out_repr())
                          ->AsIntermediate()
                          ->assert_is_only_output_of_op("elementwise_add");
  } else {
    // Conv output as AffineChannel input
    conv_out_var->assert_is_op_input("affine_channel", "X");
  }
  // AC Scale
  auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
                           ->AsInput()
                           ->assert_is_persistable_var()
                           ->assert_is_op_input("affine_channel", "Scale");
  // AC Bias
  auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
                          ->AsInput()
                          ->assert_is_persistable_var()
                          ->assert_is_op_input("affine_channel", "Bias");
  // AC output
  auto *ac_out_var = pattern->NewNode(ac_out_repr())
                         ->AsOutput()
                         ->assert_is_op_output("affine_channel");
  conv_op->LinksFrom({conv_input, conv_weight_var}).LinksTo({conv_out_var});
  if (with_eltwise_add) {
    eltwise_op->LinksFrom({conv_out_var, eltwise_y_in_var})
        .LinksTo({eltwise_out_var});
    affine_channel_op->LinksFrom({eltwise_out_var, ac_scale_var, ac_bias_var})
        .LinksTo({ac_out_var});
  } else {
    affine_channel_op->LinksFrom({conv_out_var, ac_scale_var, ac_bias_var})
        .LinksTo({ac_out_var});
  }
  return ac_out_var;
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@ -734,6 +734,38 @@ struct ConvElementwiseadd : public PatternBase {
  PATTERN_DECL_NODE(elementwise_add_out);
 };
 // Conv with affine_channel
 // op: conv + (elementwise_add +) affine_channel
 // named nodes:
 // conv_weight, conv_out, conv,
 // ac_x, ac_scale, ac_bias
 // affine_channel, ac_out
 struct ConvAffineChannel : public PatternBase {
  ConvAffineChannel(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "conv_affine_channel") {}
  PDNode* operator()(PDNode* conv_input, bool with_eltwise_add);
  // declare operator node's name
  PATTERN_DECL_NODE(conv);
  PATTERN_DECL_NODE(affine_channel);
  PATTERN_DECL_NODE(eltwise);  // ELEMENTWISE_ADD
  // CONV inputs
  PATTERN_DECL_NODE(conv_weight);  // Filter
  // CONV outputs
  PATTERN_DECL_NODE(conv_out);  // tmp
  // ELTWISE inputs
  PATTERN_DECL_NODE(eltwise_y_in);
  // ELTWISE outputs
  PATTERN_DECL_NODE(eltwise_out);  // tmp
  // AC(Affine_Channel) inputs
  PATTERN_DECL_NODE(ac_scale);
  PATTERN_DECL_NODE(ac_bias);
  // AC outputs
  PATTERN_DECL_NODE(ac_out);  // Out
 };
 }  // namespace patterns
 // Link two ir::Nodes from each other.
--- a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
@ -75,6 +75,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
  std::vector<Node*> optimize_ops;
  std::vector<Node*> lr_ops;  // ops other than forward/backward/optimize
  std::unordered_set<std::string> grad_names;
  std::unordered_map<std::string, std::string> gradname2paramname;
  std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
  auto origin_nodes = graph->ReleaseNodes();
@ -99,6 +100,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
      auto op_role_vars = boost::get<std::vector<std::string>>(op_role_var);
      for (size_t i = 0; i < op_role_vars.size(); i += 2) {
        grad_names.insert(op_role_vars[i + 1]);
        gradname2paramname[op_role_vars[i + 1]] = op_role_vars[i];
      }
    } else if (op_role & static_cast<int>(framework::OpRole::kLRSched)) {
      lr_ops.push_back(node);
@ -109,7 +111,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
  // 2. copy forward backward
  ir::Node* prev_repeat_last_op_node = nullptr;
-  // record origin_grad -> repeated grad list map.
+  // record origin_grad -> repeated_grad_list map.
  std::map<ir::Node*, std::vector<ir::Node*>> grad_repeated_map;
  std::map<std::string, std::vector<ir::Node*>> created;
  std::unordered_set<std::string> bn_vars_need_rename;
@ -124,10 +126,16 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
        if (grad_names.find(outname) != grad_names.end()) {
          std::string new_gname = string::Sprintf("%s.repeat.%d", outname, i);
          repeated_op.RenameOutput(outname, new_gname);
          // remove op_role_var for backward ops that outputs grad for a
          // parameter.
          repeated_op.SetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName(),
                              std::vector<std::string>());
        }
      }
      // 3.5 let batch_norm ops use independent vars, note batch_norm_grad do
-      // not need this update
+      // not need this update, because only moving mean and variance should be
      // differ, trainable parameter scale and bias is the same as other
      // parameters.
      if (node->Name() == "batch_norm") {
        // NOTE: assume bn op created by layers use save var as output mean and
        // variance
@ -224,16 +232,25 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
        var->inputs.push_back(repeated_node);
      }
    }
-  }
+  }  // end copy forward backward
-  // 5. create GRAD merge op node
+  // 5. create GRAD merge op node: sum(repeat.0...repeat.n) ->
  // scale(1/num_repeats)
  for (auto kv : grad_repeated_map) {
    OpDesc sum_op;
    sum_op.SetType("sum");
    std::vector<std::string> repeated_grad_names;
    std::vector<std::string> param_grad_op_role_var;
    for (auto r : kv.second) {
      repeated_grad_names.push_back(r->Var()->Name());
    }
    // NOTE: use op_role_var to control allreduce op appending in
    //       multi_devices_graph_pass, we want to append op_role_var
    //       only once for the merged gradient, so break after first call.
    param_grad_op_role_var.push_back(
        gradname2paramname.at(kv.first->Var()->Name()));        // param
    param_grad_op_role_var.push_back(kv.first->Var()->Name());  // grad
    sum_op.SetInput("X", repeated_grad_names);
    sum_op.SetOutput("Out", {kv.first->Var()->Name()});
    sum_op.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
@ -256,6 +273,10 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
    scale_op.SetAttr("scale", static_cast<float>(1.0f / num_repeats));
    scale_op.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
                     static_cast<int>(OpRole::kBackward));
    scale_op.SetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName(),
                     param_grad_op_role_var);
    auto scale_op_node = result.CreateOpNode(&scale_op);
    scale_op_node->inputs.push_back(sum_out_var_node);
    sum_out_var_node->outputs.push_back(scale_op_node);
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@ -16,7 +16,6 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <algorithm>
 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@ -380,7 +379,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
    return &(var.Get<SelectedRows>().value());
  } else {
    PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
-                 var.Type().name());
+                 ToTypeName(var.Type()));
  }
 }
@ -391,7 +390,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
    return var->GetMutable<SelectedRows>()->mutable_value();
  } else {
    PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
-                 var->Type().name());
+                 ToTypeName(var->Type()));
  }
 }
@ -485,7 +484,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
                   PADDLE_ENFORCE(
                       var->IsType<LoDTensor>(),
                       "should be LoDTensor, but the received type is %s",
-                       var->Type().name());
+                       ToTypeName(var->Type()));
                   return &(var->Get<LoDTensor>());
                 });
  return res;
@ -504,7 +503,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
                   PADDLE_ENFORCE(
                       var->IsType<LoDTensor>(),
                       "%s should be LoDTensor, but the received type is %s",
-                       sub_name, var->Type().name());
+                       sub_name, ToTypeName(var->Type()));
                   return &(var->Get<LoDTensor>());
                 });
  return res;
@ -533,7 +532,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
                   PADDLE_ENFORCE(
                       var->IsType<LoDTensor>(),
                       "%s should be LoDTensor, but the received type is %s",
-                       sub_name, var->Type().name());
+                       sub_name, ToTypeName(var->Type()));
                   return var->GetMutable<LoDTensor>();
                 });
  return res;
@ -775,7 +774,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
      PADDLE_THROW(
          "Only LoDTensor/SelectedRows support 'GetDim', but Variables "
          "type_id is %s.",
-          var->Type().name());
+          ToTypeName(var->Type()));
    }
  }
@ -798,7 +797,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
      var->GetMutable<SelectedRows>()->set_height(dim[0]);
    } else {
      PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
-                   var->Type().name());
+                   ToTypeName(var->Type()));
    }
  }
@ -1041,12 +1040,11 @@ Scope* OperatorWithKernel::PrepareData(
 proto::VarType::Type OperatorWithKernel::IndicateDataType(
    const ExecutionContext& ctx) const {
  auto& scope = ctx.scope();
  int data_type = -1;
  std::string last_input_name;
  for (auto& input : this->inputs_) {
-    for (auto& ipt_name : input.second) {
+    const std::vector<const Variable*> vars = ctx.MultiInputVar(input.first);
-      auto* var = scope.FindVar(ipt_name);
+    for (size_t i = 0; i < vars.size(); ++i) {
      const Variable* var = vars[i];
      if (var != nullptr) {
        const Tensor* t = nullptr;
        if (var->IsType<Tensor>()) {
@ -1057,15 +1055,14 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
          t = &(var->Get<SelectedRows>().value());
        }
        if (t != nullptr) {
-          PADDLE_ENFORCE(t->IsInitialized(), "Input %s is not initialized",
+          PADDLE_ENFORCE(t->IsInitialized(), "Input %s(%lu)is not initialized",
-                         ipt_name);
+                         input.first, i);
          int tmp = static_cast<int>(t->type());
          PADDLE_ENFORCE(
              tmp == data_type || data_type == -1,
-              "DataType of Paddle Op %s must be the same. Get %s(%d) != %s(%d)",
+              "DataType of Paddle Op %s must be the same. Get (%d) != (%d)",
-              Type(), last_input_name, data_type, ipt_name, tmp);
+              Type(), data_type, tmp);
          data_type = tmp;
          last_input_name = ipt_name;
        }
      }
    }
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@ -81,6 +81,10 @@ class RuntimeContext {
  RuntimeContext(const VariableNameMap& innames,
                 const VariableNameMap& outnames, const Scope& scope);
  RuntimeContext(const VariableValueMap& invars,
                 const VariableValueMap& outvars)
      : inputs(invars), outputs(outvars) {}
  VariableValueMap inputs;
  VariableValueMap outputs;
 };
@ -447,8 +451,9 @@ class OperatorWithKernel : public OperatorBase {
  void RuntimeInferShape(const Scope& scope, const platform::Place& place,
                         const RuntimeContext& ctx) const override;
 protected:
  virtual OpKernelType GetExpectedKernelType(const ExecutionContext& ctx) const;
 protected:
  virtual OpKernelType GetKernelTypeForVar(
      const std::string& var_name, const Tensor& tensor,
      const OpKernelType& expected_kernel_type) const;
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@ -320,6 +320,7 @@ void ParallelExecutor::BCastParamsToDevices(
    if (paddle::platform::is_gpu_place(main_tensor.place())) {
 #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      std::vector<void *> buffers;
      buffers.reserve(member_->places_.size());
      size_t numel = main_tensor.numel();
      ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
      for (size_t i = 0; i < member_->places_.size(); ++i) {
@ -353,9 +354,7 @@ void ParallelExecutor::BCastParamsToDevices(
 #endif
    } else {
      platform::CPUPlace cpu;
-      for (size_t i = 0; i < member_->places_.size(); ++i) {
+      for (size_t i = 1; i < member_->places_.size(); ++i) {
        if (i == 0) continue;
        auto local_scope = member_->local_scopes_[i];
        auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
 Variable* Scope::VarInternal(const std::string& name) {
  auto* v = FindVarLocally(name);
  if (v != nullptr) return v;
  v = new Variable();
-  vars_[name].reset(v);
+  vars_.emplace(name, std::unique_ptr<Variable>(v));
  VLOG(3) << "Create variable " << name;
  v->name_ = &(vars_.find(name)->first);
  return v;
 }
--- a/paddle/fluid/framework/var_type.h
+++ b/paddle/fluid/framework/var_type.h
@ -19,52 +19,50 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_tensor_array.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/var_type_traits.h"
 #include "paddle/fluid/framework/variable.h"
 namespace paddle {
 namespace framework {
 template <typename T>
-inline bool IsType(const std::type_index& type_index) {
+inline bool IsType(const std::type_index& type) {
-  return type_index == std::type_index(typeid(T));
+  return type == typeid(T);
 }
-inline proto::VarType::Type ToVarType(std::type_index type) {
+inline proto::VarType::Type ToVarType(int type) {
-  if (IsType<LoDTensor>(type)) {
+  switch (type) {
-    return proto::VarType_Type_LOD_TENSOR;
+    case proto::VarType::LOD_TENSOR:
-  } else if (IsType<LoDRankTable>(type)) {
+    case proto::VarType::SELECTED_ROWS:
-    return proto::VarType_Type_LOD_RANK_TABLE;
+    case proto::VarType::LOD_RANK_TABLE:
-  } else if (IsType<LoDTensorArray>(type)) {
+    case proto::VarType::LOD_TENSOR_ARRAY:
-    return proto::VarType_Type_LOD_TENSOR_ARRAY;
+    case proto::VarType::READER:
-  } else if (IsType<SelectedRows>(type)) {
+      return static_cast<proto::VarType::Type>(type);
-    return proto::VarType_Type_SELECTED_ROWS;
+    default:
-  } else if (IsType<ReaderHolder>(type)) {
+      PADDLE_THROW("ToVarType:Unsupported type %d", type);
    return proto::VarType_Type_READER;
  } else {
    PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
  }
 }
 template <typename Visitor>
 inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
-  switch (ToVarType(var.Type())) {
+  switch (var.Type()) {
-    case proto::VarType_Type_LOD_TENSOR:
+    case proto::VarType::LOD_TENSOR:
      visitor(var.Get<LoDTensor>());
      return;
-    case proto::VarType_Type_LOD_RANK_TABLE:
+    case proto::VarType::LOD_RANK_TABLE:
      visitor(var.Get<LoDRankTable>());
      return;
-    case proto::VarType_Type_LOD_TENSOR_ARRAY:
+    case proto::VarType::LOD_TENSOR_ARRAY:
      visitor(var.Get<LoDTensorArray>());
      return;
-    case proto::VarType_Type_SELECTED_ROWS:
+    case proto::VarType::SELECTED_ROWS:
      visitor(var.Get<SelectedRows>());
      return;
-    case proto::VarType_Type_READER:
+    case proto::VarType::READER:
      visitor(var.Get<ReaderHolder>());
      return;
    default:
-      PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
+      PADDLE_THROW("Not supported visit type, %s", ToTypeName(var.Type()));
  }
 }
--- a/paddle/fluid/framework/var_type_inference_test.cc
+++ b/paddle/fluid/framework/var_type_inference_test.cc
@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
  op->InferVarType(prog.MutableBlock(0));
-  ASSERT_EQ(proto::VarType_Type_LOD_TENSOR,
+  ASSERT_EQ(proto::VarType::LOD_TENSOR,
            prog.MutableBlock(0)->Var("test2_out")->GetType());
 }
--- a/paddle/fluid/framework/var_type_traits.cc
+++ b/paddle/fluid/framework/var_type_traits.cc
@ -0,0 +1,119 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/var_type_traits.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
 #include "paddle/fluid/platform/macros.h"
 #ifdef PADDLE_WITH_CUDA
 #ifndef _WIN32
 #include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
 #endif
 #include <cudnn.h>
 #include "paddle/fluid/operators/conv_cudnn_op_cache.h"
 #include "paddle/fluid/operators/cudnn_rnn_cache.h"
 #endif
 namespace paddle {
 namespace framework {
 // Besides registering variable type id, it is helpful to register a
 // var_id -> std::type_index map (for example, get type names according to id)
 namespace detail {
 template <int kStart, int kEnd, bool kStop>
 struct VarIdToTypeIndexMapInitializerImpl {
  template <typename MapType1, typename MapType2>
  static void Init(MapType1 *id_to_type, MapType2 *type_to_id) {
    using Type =
        typename std::tuple_element<kStart, VarTypeRegistry::ArgTuple>::type;
    static_assert(!std::is_same<Type, void>::value, "Type cannot be void");
    constexpr int kId = VarTypeTrait<Type>::kId;
    auto type = std::type_index(typeid(Type));
    PADDLE_ENFORCE(id_to_type->count(kId) == 0,
                   "Registered duplicate type id %d for type %s", kId,
                   type.name());
    PADDLE_ENFORCE(type_to_id->count(type) == 0,
                   "Registered duplicate type_index %s for id %d", type.name(),
                   kId);
    id_to_type->emplace(kId, type);
    type_to_id->emplace(type, kId);
    VarIdToTypeIndexMapInitializerImpl<kStart + 1, kEnd,
                                       kStart + 1 == kEnd>::Init(id_to_type,
                                                                 type_to_id);
  }
 };
 template <int kStart, int kEnd>
 struct VarIdToTypeIndexMapInitializerImpl<kStart, kEnd, true> {
  template <typename MapType1, typename MapType2>
  static void Init(MapType1 *, MapType2 *) {}
 };
 // VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
 // std::type_index map and std::type_index -> var_id map
 using VarIdToTypeIndexMapInitializer =
    VarIdToTypeIndexMapInitializerImpl<0, VarTypeRegistry::kRegisteredTypeNum,
                                       VarTypeRegistry::kRegisteredTypeNum ==
                                           0>;
 struct VarIdToTypeIndexMapHolder {
  DISABLE_COPY_AND_ASSIGN(VarIdToTypeIndexMapHolder);
 public:
  static const std::type_index &ToTypeIndex(int var_id) {
    auto it = Instance().id_to_type_map_.find(var_id);
    PADDLE_ENFORCE(it != Instance().id_to_type_map_.end(),
                   "VarId %d is not registered.", var_id);
    return it->second;
  }
  static int ToTypeId(const std::type_index &type) {
    auto it = Instance().type_to_id_map_.find(type);
    PADDLE_ENFORCE(it != Instance().type_to_id_map_.end(),
                   "VarType %s is not registered.", type.name());
    return it->second;
  }
 private:
  VarIdToTypeIndexMapHolder() {
    VarIdToTypeIndexMapInitializer::Init(&id_to_type_map_, &type_to_id_map_);
  }
  static const VarIdToTypeIndexMapHolder &Instance() {
    static const VarIdToTypeIndexMapHolder instance;
    return instance;
  }
  std::unordered_map<int, std::type_index> id_to_type_map_;
  std::unordered_map<std::type_index, int> type_to_id_map_;
 };
 }  // namespace detail
 const std::type_index &ToTypeIndex(int var_id) {
  return detail::VarIdToTypeIndexMapHolder::ToTypeIndex(var_id);
 }
 const char *ToTypeName(int var_id) { return ToTypeIndex(var_id).name(); }
 int ToTypeId(const std::type_index &type) {
  return detail::VarIdToTypeIndexMapHolder::ToTypeId(type);
 }
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/var_type_traits.h
+++ b/paddle/fluid/framework/var_type_traits.h
@ -0,0 +1,195 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <map>
 #include <string>
 #include <tuple>
 #include <typeindex>
 #include <vector>
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
 #include "paddle/fluid/platform/place.h"
 #ifdef PADDLE_WITH_CUDA
 #include <cudnn.h>
 #ifndef _WIN32
 #include <nccl.h>
 #endif
 #endif
 // Users should add forward declarations here
 namespace paddle {
 namespace platform {
 #ifdef PADDLE_WITH_CUDA
 #ifndef _WIN32
 class Communicator;
 #endif
 #endif
 }  // namespace platform
 namespace framework {
 class Tensor;
 class LoDTensor;
 class SelectedRows;
 class LoDRankTable;
 class ReaderHolder;
 class Scope;
 }  // namespace framework
 namespace operators {
 template <typename T>
 class AlgorithmsCache;
 class CudnnRNNCache;
 namespace reader {
 class LoDTensorBlockingQueueHolder;
 }  // namespace reader
 }  // namespace operators
 }  // namespace paddle
 namespace paddle {
 namespace framework {
 const char *ToTypeName(int var_id);
 const std::type_index &ToTypeIndex(int var_id);
 int ToTypeId(const std::type_index &type);
 namespace detail {
 template <bool kStop, int kStart, int kEnd, typename T1, typename T2,
          typename... Args>
 struct TypePosFinderImpl {
  static constexpr int kPos =
      std::is_same<T1, T2>::value
          ? kStart
          : TypePosFinderImpl<kStart + 2 == kEnd, kStart + 1, kEnd, T1,
                              Args...>::kPos;
 };
 template <int kStart, int kEnd, typename T1, typename T2>
 struct TypePosFinderImpl<true, kStart, kEnd, T1, T2> {
  static constexpr int kPos = std::is_same<T1, T2>::value ? kStart : -1;
 };
 // TypePosFinder helps to find the position in which T is inside Args...
 // If T is not inside Args..., kPos would be -1
 template <typename T, typename... Args>
 struct TypePosFinder {
  static constexpr int kPos =
      TypePosFinderImpl<sizeof...(Args) == 1, 0, sizeof...(Args), T,
                        Args...>::kPos;
 };
 template <typename... Args>
 struct VarTypeRegistryImpl {
  static constexpr size_t kRegisteredTypeNum = sizeof...(Args);
  using ArgTuple = std::tuple<Args...>;
  // TypePos() returns the position in which T is inside Args...
  // If T is not inside Args..., return -1
  template <typename T>
  static constexpr int TypePos() {
    return TypePosFinder<T, Args...>::kPos;
  }
  // IsRegistered() returns whether T is registered inside RegistryImpl
  template <typename T>
  static constexpr bool IsRegistered() {
    return TypePos<T>() >= 0;
  }
 };
 }  // namespace detail
 #define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id)           \
  template <>                                              \
  struct VarTypeTrait<type> {                              \
    static_assert(VarTypeRegistry::IsRegistered<type>(),   \
                  "Must be registered type");              \
    using Type = type;                                     \
    static constexpr int kId = static_cast<int>(proto_id); \
  }
 /**
 * The following codes are designed to register variable types.
 * Only registered types can be stored in Variable.
 * This registry mechanism is designed to speed up Variable.
 *
 * Caution: If you want to add more var types, please consider carefully
 * whether you really need to add it.
 */
 // Users should add other variable types below.
 // Paddle would generate unique Ids for each registered variable types.
 using VarTypeRegistry = detail::VarTypeRegistryImpl<
    Tensor, LoDTensor, SelectedRows, std::vector<Scope *>, LoDRankTable,
    LoDTensorArray, platform::PlaceList, ReaderHolder, std::string, Scope *,
    std::map<size_t, Tensor>, operators::reader::LoDTensorBlockingQueueHolder,
 #ifdef PADDLE_WITH_CUDA
 #ifndef _WIN32
    ncclUniqueId, platform::Communicator,
 #endif
    operators::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>,
    operators::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>,
    operators::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>,
    operators::CudnnRNNCache,
 #endif
    int, float>;
 template <typename T>
 struct VarTypeTrait {
  static_assert(VarTypeRegistry::IsRegistered<T>(), "Must be registered type");
  using Type = T;
  /**
   * Unique VarType Id generation.
   *
   * The auto-generated id should not be the same as any protobuf id defined in
   * framework.proto. Therefore, we generate id by adding the type pos and
   * maximum protobuf id (i.e., proto::VarType::TUPLE).
   *
   * However, we may need more protobuf id in the future.
   * To avoid changing this auto id generation algorithm frequently, we
   * generate id by adding the type pos and twice of maximum protobuf id (i.e.,
   * proto::VarType::TUPLE).
   */
  static constexpr int kId = VarTypeRegistry::TypePos<T>() +
                             static_cast<int>(proto::VarType::TUPLE) * 2;
 };
 // Users should set some of variable type ids to be what is defined in
 // framework.proto below
 REG_PROTO_VAR_TYPE_TRAIT(LoDTensor, proto::VarType::LOD_TENSOR);
 REG_PROTO_VAR_TYPE_TRAIT(SelectedRows, proto::VarType::SELECTED_ROWS);
 REG_PROTO_VAR_TYPE_TRAIT(std::vector<Scope *>, proto::VarType::STEP_SCOPES);
 REG_PROTO_VAR_TYPE_TRAIT(LoDRankTable, proto::VarType::LOD_RANK_TABLE);
 REG_PROTO_VAR_TYPE_TRAIT(LoDTensorArray, proto::VarType::LOD_TENSOR_ARRAY);
 REG_PROTO_VAR_TYPE_TRAIT(platform::PlaceList, proto::VarType::PLACE_LIST);
 REG_PROTO_VAR_TYPE_TRAIT(ReaderHolder, proto::VarType::READER);
 REG_PROTO_VAR_TYPE_TRAIT(int, proto::VarType::INT32);
 REG_PROTO_VAR_TYPE_TRAIT(float, proto::VarType::FP32);
 /** End of variable type registration */
 template <typename T>
 inline constexpr bool IsRegisteredVarType() {
  return VarTypeRegistry::IsRegistered<T>();
 }
 #undef REG_PROTO_VAR_TYPE_TRAIT
 }  // namespace framework
 }  // namespace paddle
--- a/Show More
+++ b/Show More