remove anakin from code, test=develop (#22420)

5 years ago · e1b0d7cbb1
parent 0a678ca045
commit e1b0d7cbb1
114 changed files with 10 additions and 7532 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -151,7 +151,6 @@ if(NOT WIN32)
    include(cupti)
 endif()
 include(anakin_subgraph)
 include(flags)              # set paddle compile flags
 include(cudnn)              # set cudnn libraries, must before configure
--- a/cmake/anakin_subgraph.cmake
+++ b/cmake/anakin_subgraph.cmake
@ -1,45 +0,0 @@
 set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
 find_path(ANAKIN_INCLUDE_DIR anakin_config.h
    PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
    $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
    NO_DEFAULT_PATH
 )
 find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
    PATHS ${ANAKIN_ROOT}
    $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
    NO_DEFAULT_PATH
    DOC "Path to ANAKIN library.")
 if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
    set(ANAKIN_FOUND ON)
 else()
    set(ANAKIN_FOUND OFF)
 endif()
 if(ANAKIN_FOUND)
    message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
    include_directories(${ANAKIN_ROOT})
    include_directories(${ANAKIN_ROOT}/include)
    include_directories(${ANAKIN_ROOT}/saber)
    link_directories(${ANAKIN_ROOT})
    add_definitions(-DPADDLE_WITH_ANAKIN)
 endif()
 if(ANAKIN_FOUND)
  if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86)
    message(STATUS "Compile with anakin mlu place.")
    add_definitions(-DANAKIN_MLU_PLACE)
  elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86)
    message(STATUS "Compile with anakin bm place.")
    add_definitions(-DANAKIN_BM_PLACE)
  elseif(ANAKIN_X86)
    message(STATUS "Compile with anakin x86 place.")
    add_definitions(-DANAKIN_X86_PLACE)
  endif()
 endif()
 if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
    message(STATUS "Compile with anakin subgraph.")
    set(ANAKIN_SUBGRAPH ON)
 endif()
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@ -109,16 +109,6 @@ if(WITH_GPU)
        endif()
        include_directories(${TENSORRT_INCLUDE_DIR})
    endif()
    if(ANAKIN_FOUND)
        if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
            message(WARNING "Anakin needs CUDA >= 8.0 to compile. Force ANAKIN_FOUND = OFF")
            set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDA >= 8.0." FORCE)
        endif()
        if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
            message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force ANAKIN_FOUND = OFF")
            set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
        endif()
    endif()
 elseif(WITH_AMD_GPU)
    add_definitions(-DPADDLE_WITH_HIP)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -137,13 +137,6 @@ function(copy_part_of_thrid_party TARGET DST)
                SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
                DSTS ${dst_dir})
    endif()
    if (ANAKIN_FOUND)
        set(dst_dir "${DST}/third_party/install/anakin")
        copy(${TARGET}
                SRCS ${ANAKIN_ROOT}/*
                DSTS ${dst_dir})
    endif ()
 endfunction()
 # inference library for only inference
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -71,7 +71,6 @@ pass_library(identity_scale_op_clean_pass base)
 pass_library(sync_batch_norm_pass base)
 pass_library(runtime_context_cache_pass base)
 pass_library(quant_conv2d_dequant_fuse_pass inference)
 pass_library(fillconstant_elementwisemul_fuse inference)
 pass_library(shuffle_channel_detect_pass inference)
 pass_library(delete_quant_dequant_op_pass inference)
 pass_library(simplify_with_basic_ops_pass base)
@ -81,10 +80,6 @@ if(WITH_GPU)
    pass_library(cudnn_placement_pass base DEPS placement_pass_base)
 endif()
 if(ANAKIN_SUBGRAPH)
 pass_library(simplify_anakin_priorbox_detection_out_pass inference)
 endif()
 if(WITH_MKLDNN)
    pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn)
    pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn)
--- a/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc
+++ b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc
@ -1,83 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <memory>
 #include <string>
 #include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
 #define GET_NODES                 \
  GET_IR_NODE(fill_constant);     \
  GET_IR_NODE(fill_constant_out); \
  GET_IR_NODE(elementwise_mul);   \
  GET_IR_NODE(elementwise_mul_out);
 void FillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
  const std::string pattern_name = "fillconstant_elementwisemul_fuse";
  FusePassBase::Init(pattern_name, graph);
  GraphPatternDetector gpd;
  auto* x = gpd.mutable_pattern()
                ->NewNode("x")
                ->assert_is_op_input("elementwise_mul", "X")
                ->AsInput();
  patterns::FillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
                                                   pattern_name);
  pattern(x);
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    GET_NODES;
    PADDLE_ENFORCE(subgraph.count(x));
    auto* elementwise_in = subgraph.at(x);
    float constant_value =
        boost::get<float>(fill_constant->Op()->GetAttr("value"));
    framework::OpDesc new_op_desc;
    new_op_desc.SetType("scale");
    new_op_desc.SetInput("X", {elementwise_in->Name()});
    new_op_desc.SetAttr("scale", constant_value);
    new_op_desc.SetAttr("bias", static_cast<float>(0.0));
    new_op_desc.SetAttr("bias_after_scale", true);
    new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
    new_op_desc.Flush();
    // Create a new node for the fused op.
    auto* scale_op = graph->CreateOpNode(&new_op_desc);
    IR_NODE_LINK_TO(elementwise_in, scale_op);       // Input
    IR_NODE_LINK_TO(scale_op, elementwise_mul_out);  // Output
    // Delete the unneeded nodes.
    GraphSafeRemoveNodes(graph,
                         {fill_constant, fill_constant_out, elementwise_mul});
  };
  gpd(graph, handler);
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 REGISTER_PASS(fillconstant_elementwisemul_fuse,
              paddle::framework::ir::FillconstantElementwisemulFuse);
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@ -1878,173 +1878,6 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
  return concat_out;
 }
 PDNode *patterns::AnakinDetectionPattern::operator()(
    std::vector<PDNode *> conv_in, int times, std::string priorbox_type,
    bool is_reshape) {
  // The times represents the repeat times of the
  // {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
  const int kNumFields = 7;
  const int kPriorBoxLocOffset = 1;
  const int kReshape1Offset = 2;
  const int kReshape1OutOffset = 3;
  const int kPriorBoxVarOffset = 4;
  const int kReshape2Offset = 5;
  const int kReshape2OutOffset = 6;
  const int kBoxCoderThirdInputOffset = times;
  const int kMultiClassSecondInputNmsOffset = times + 1;
  std::vector<PDNode *> nodes;
  std::string op_after_priorbox = is_reshape ? "reshape2" : "flatten2";
  for (int i = 0; i < times; i++) {
    nodes.push_back(
        pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
            ->assert_is_op(priorbox_type));
    nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
                        ->assert_is_op_output(priorbox_type, "Boxes")
                        ->assert_is_op_input(op_after_priorbox, "X")
                        ->AsIntermediate());
    nodes.push_back(
        pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
            ->assert_is_op(op_after_priorbox));
    nodes.push_back(
        pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
            ->assert_is_op_output(op_after_priorbox)
            ->assert_is_op_nth_input("concat", "X", i)
            ->AsIntermediate());
    nodes.push_back(
        pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
            ->assert_is_op_output(priorbox_type, "Variances")
            ->assert_is_op_input(op_after_priorbox, "X")
            ->AsIntermediate());
    nodes.push_back(
        pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
            ->assert_is_op(op_after_priorbox));
    nodes.push_back(
        pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
            ->assert_is_op_output(op_after_priorbox)
            ->assert_is_op_nth_input("concat", "X", i)
            ->AsIntermediate());
  }
  auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
                        ->assert_is_op("concat")
                        ->assert_op_has_n_inputs("concat", times);
  auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
                         ->assert_is_op_output("concat")
                         ->AsIntermediate();
  auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
                        ->assert_is_op("concat")
                        ->assert_op_has_n_inputs("concat", times);
  auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
                         ->assert_is_op_output("concat")
                         ->AsIntermediate();
  auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
                          ->assert_is_op("box_coder")
                          ->assert_op_has_n_inputs("box_coder", 3);
  auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
                           ->assert_is_op_output("box_coder")
                           ->AsIntermediate();
  auto transpose_before_nms =
      pattern->NewNode(GetNodeName("transpose_before_nms"))
          ->assert_is_op("transpose2");
  auto transpose_before_nms_out =
      pattern->NewNode(GetNodeName("transpose_before_nms_out"))
          ->assert_is_op_output("transpose2")
          ->assert_is_op_input("multiclass_nms", "Scores")
          ->AsIntermediate();
  auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
                               ->assert_is_op("multiclass_nms")
                               ->assert_op_has_n_inputs("multiclass_nms", 2);
  auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
                                ->assert_is_op_output("multiclass_nms")
                                ->AsOutput();
  std::vector<PDNode *> reshape1_outs;
  std::vector<PDNode *> reshape2_outs;
  for (int i = 0; i < times; i++) {
    conv_in[i]->AsInput();
    // prior_box
    nodes[i * kNumFields]->LinksFrom({conv_in[i]});
    // prior_box box out
    nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
        {nodes[i * kNumFields]});
    // reshape
    nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
        {nodes[i * kNumFields + kPriorBoxLocOffset]});
    // reshape_out
    nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
        {nodes[i * kNumFields + kReshape1Offset]});
    nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
        {nodes[i * kNumFields]});
    // reshape
    nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
        {nodes[i * kNumFields + kPriorBoxVarOffset]});
    // reshape_out
    nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
        {nodes[i * kNumFields + kReshape2Offset]});
    reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
    reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
  }
  concat_op1->LinksFrom(reshape1_outs);
  concat_op2->LinksFrom(reshape2_outs);
  concat_out1->LinksFrom({concat_op1});
  concat_out2->LinksFrom({concat_op2});
  conv_in[kBoxCoderThirdInputOffset]->AsInput();
  conv_in[kMultiClassSecondInputNmsOffset]->AsInput();
  box_coder_op->LinksFrom(
      {concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
  box_coder_out->LinksFrom({box_coder_op});
  transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
  transpose_before_nms_out->LinksFrom({transpose_before_nms});
  multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
      .LinksTo({multiclass_nms_out});
  return multiclass_nms_out;
 }
 PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
    PDNode *elementwise_op_input) {
  auto fill_constant =
      pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");
  auto fill_constant_out = pattern->NewNode(fill_constant_out_repr())
                               ->assert_is_op_output("fill_constant")
                               ->assert_is_op_input("elementwise_mul", "Y")
                               ->AsIntermediate();
  auto elementwise_mul_op =
      pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul");
  auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr())
                                 ->assert_is_op_output("elementwise_mul")
                                 ->AsOutput();
  fill_constant_out->LinksFrom({fill_constant});
  elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out});
  elementwise_mul_out->LinksFrom({elementwise_mul_op});
  return elementwise_mul_out;
 }
 void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
                                              const std::string &op_type,
                                              const std::string &weight_name,
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@ -1093,37 +1093,6 @@ struct TransposeFlattenConcat : public PatternBase {
  }
 };
 struct AnakinDetectionPattern : public PatternBase {
  AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
  PDNode* operator()(std::vector<PDNode*> conv_inputs, int times,
                     std::string priorbox_type, bool is_reshape);
  std::string GetNodeName(const std::string& op_type) {
    return PDNodeName(name_scope_, repr_, id_, op_type);
  }
  PDNode* GetPDNode(const std::string& op_type) {
    return pattern->RetrieveNode(GetNodeName(op_type));
  }
 };
 struct FillConstantElementWiseMulFuse : public PatternBase {
  FillConstantElementWiseMulFuse(PDPattern* pattern,
                                 const std::string& name_scope)
      : PatternBase(pattern, name_scope,
                    "anakin_fillconstant_elementwisemul_fuse") {}
  PDNode* operator()(PDNode* elementwise_op_input);
  // declare operator node's name
  PATTERN_DECL_NODE(fill_constant);
  PATTERN_DECL_NODE(fill_constant_out);
  PATTERN_DECL_NODE(elementwise_mul);
  PATTERN_DECL_NODE(elementwise_mul_out);
 };
 struct QuantDequantOpFuse : public PatternBase {
  QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "quant_dequant_fuse") {}
--- a/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc
+++ b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc
@ -1,233 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 #include "paddle/fluid/framework/ir/node.h"
 #include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 void RunSimplifyAnakinDetection(ir::Graph *graph, int times, bool is_density,
                                bool is_reshape) {
  const std::string pattern_name =
      "simplify_anakin_detection_pattern_pass" + std::to_string(times);
  std::string priorbox_type = is_density ? "density_prior_box" : "prior_box";
  GraphPatternDetector gpd;
  std::vector<PDNode *> input_nodes;
  for (int i = 0; i < times; i++) {
    input_nodes.push_back(gpd.mutable_pattern()
                              ->NewNode("x" + std::to_string(i))
                              ->assert_is_op_input(priorbox_type, "Input")
                              ->AsInput());
  }
  input_nodes.push_back(gpd.mutable_pattern()
                            ->NewNode("x" + std::to_string(times))
                            ->assert_is_op_input("box_coder", "TargetBox")
                            ->AsInput());
  input_nodes.push_back(gpd.mutable_pattern()
                            ->NewNode("x" + std::to_string(times + 1))
                            ->assert_is_op_input("transpose2")
                            ->AsInput());
  patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
  pattern(input_nodes, times, priorbox_type, is_reshape);
  auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
                     Graph *g) {
    const int kNumFields = 7;
    const int kPriorBoxLocOffset = 1;
    const int kReshape1Offset = 2;
    const int kReshape1OutOffset = 3;
    const int kPriorBoxVarOffset = 4;
    const int kReshape2Offset = 5;
    const int kReshape2OutOffset = 6;
    std::vector<Node *> nodes;
    for (int i = 0; i < times; i++) {
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
      PADDLE_ENFORCE(
          subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
      nodes.push_back(
          subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
    }
    Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
    Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
    Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
    Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
    Node *box_coder_third_input = subgraph.at(input_nodes[times]);
    Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
    Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
    Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
    Node *transpose_before_nms =
        subgraph.at(pattern.GetPDNode("transpose_before_nms"));
    Node *transpose_before_nms_out =
        subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
    Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
    Node *multiclass_nms_out =
        subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
    std::string code_type =
        boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
    bool box_normalized =
        boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
    int background_label =
        boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
    float score_threshold =
        boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
    int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
    float nms_threshold =
        boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
    float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
    int keep_top_k =
        boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
    std::vector<std::string> concat1_input_names;
    for (int i = 0; i < times; i++) {
      concat1_input_names.push_back(
          nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
    }
    framework::OpDesc concat1_desc;
    concat1_desc.SetType("concat");
    concat1_desc.SetInput("X", concat1_input_names);
    concat1_desc.SetAttr("axis", 2);
    concat1_desc.SetOutput("Out", {concat_out1->Name()});
    auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
    for (int i = 0; i < times; i++) {
      nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
          new_add_concat_op);
      new_add_concat_op->inputs.push_back(
          nodes[i * kNumFields + kPriorBoxLocOffset]);
    }
    framework::OpDesc new_op_desc;
    new_op_desc.SetType("detection_out");
    new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
    new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
    new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
    new_op_desc.SetAttr("code_type", code_type);
    new_op_desc.SetAttr("box_normalized", box_normalized);
    new_op_desc.SetAttr("background_label", background_label);
    new_op_desc.SetAttr("score_threshold", score_threshold);
    new_op_desc.SetAttr("nms_top_k", nms_top_k);
    new_op_desc.SetAttr("nms_threshold", nms_threshold);
    new_op_desc.SetAttr("nms_eta", nms_eta);
    new_op_desc.SetAttr("keep_top_k", keep_top_k);
    new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
    new_op_desc.Flush();
    // Create a new node for the fused op.
    auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
    std::unordered_set<const Node *> delete_nodes;
    for (int i = 0; i < times; i++) {
      nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
      delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
      delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
      delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
      delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
      delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
    }
    delete_nodes.insert(concat_op1);
    delete_nodes.insert(concat_op2);
    delete_nodes.insert(concat_out2);
    delete_nodes.insert(box_coder_op);
    delete_nodes.insert(box_coder_out);
    delete_nodes.insert(transpose_before_nms);
    delete_nodes.insert(transpose_before_nms_out);
    delete_nodes.insert(multiclass_nms);
    new_add_concat_op->outputs.push_back(concat_out1);
    concat_out1->inputs.push_back(new_add_concat_op);
    detection_out_op->inputs.push_back(concat_out1);
    detection_out_op->inputs.push_back(box_coder_third_input);
    detection_out_op->inputs.push_back(multiclass_nms_second_input);
    detection_out_op->outputs.push_back(multiclass_nms_out);
    concat_out1->outputs.push_back(detection_out_op);
    box_coder_third_input->outputs.push_back(detection_out_op);
    multiclass_nms_second_input->outputs.push_back(detection_out_op);
    multiclass_nms_out->inputs.push_back(detection_out_op);
    // Delete the unneeded nodes.
    GraphSafeRemoveNodes(graph, delete_nodes);
  };
  gpd(graph, handler);
 }
 void SimplifyAnakinDetectionPatternPass::ApplyImpl(ir::Graph *graph) const {
  const int pattern_nums = 6;
  const std::string pattern_name = "simplify_anakin_detection_pattern_pass";
  FusePassBase::Init(pattern_name, graph);
  std::vector<bool> options = {true, false};
  for (const auto &is_density : options) {
    for (const auto &is_reshape : options) {
      for (int i = 1; i <= pattern_nums; i++) {
        RunSimplifyAnakinDetection(graph, i, is_density, is_reshape);
      }
    }
  }
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 typedef paddle::framework::ir::SimplifyAnakinDetectionPatternPass
    priorbox_pattern;
 REGISTER_PASS(simplify_anakin_priorbox_detection_out_pass, priorbox_pattern);
--- a/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h
+++ b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h
@ -1,39 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <memory>
 #include <unordered_set>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 // There may be many transpose-flatten structures in a model, and the output of
 // these structures will be used as inputs to the concat Op. This pattern will
 // be detected by our pass. The times here represents the repeat times of this
 // structure.
 class SimplifyAnakinDetectionPatternPass : public FusePassBase {
 public:
  virtual ~SimplifyAnakinDetectionPatternPass() {}
 protected:
  void ApplyImpl(ir::Graph* graph) const override;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/subgraph_detector.h
+++ b/paddle/fluid/framework/ir/subgraph_detector.h
@ -65,7 +65,7 @@ class SubGraphFuser {
  using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller;
  SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller,
-                int min_subgraph_size, std::string name = "anakin_engine")
+                int min_subgraph_size, std::string name = "tensorrt_engine")
      : graph_(graph),
        node_inside_subgraph_teller_(teller),
        min_subgraph_size_{min_subgraph_size},
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@ -30,10 +30,6 @@ if (TENSORRT_FOUND)
  add_subdirectory(tensorrt)
 endif()
 if (ANAKIN_SUBGRAPH)
  add_subdirectory(anakin)
 endif()
 if (WITH_LITE)
  add_subdirectory(lite)
 endif()
@ -68,9 +64,6 @@ if(NOT APPLE)
  set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
 endif()
 if(ANAKIN_FOUND)
    set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
 endif()
 set(SHARED_INFERENCE_SRCS
    io.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../framework/data_feed.cc
@ -80,8 +73,7 @@ set(SHARED_INFERENCE_SRCS
    ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
-    ${mkldnn_quantizer_src_file}
+    ${mkldnn_quantizer_src_file})
    ${ANAKIN_SHARED_INFERENCE_SRCS})
 # Create shared inference library defaultly
 cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
--- a/paddle/fluid/inference/anakin/CMakeLists.txt
+++ b/paddle/fluid/inference/anakin/CMakeLists.txt
@ -1,5 +0,0 @@
 cc_library(anakin_engine SRCS engine.cc DEPS framework_proto boost)
 cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost)
 target_link_libraries(anakin_engine anakin anakin_saber_common)
 cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
 add_subdirectory(convert)
--- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt
@ -1,23 +0,0 @@
 cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
 elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
 batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
 detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
 roi_align.cc shuffle_channel.cc helper.cc DEPS anakin_engine framework_proto
 scope op_registry gtest gflags)
 cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
 cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv)
 cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter)
 cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
 cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
 cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
 cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op)
 cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter)
 cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax)
 cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op)
 cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op)
 cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op)
 cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op)
 cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op)
 cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS  anakin_op_converter sum_op selected_rows_functor)
 cc_test(test_anakin_affine_channel SRCS test_affine_channel_op.cc DEPS anakin_op_converter affine_channel_op)
--- a/paddle/fluid/inference/anakin/convert/activation.cc
+++ b/paddle/fluid/inference/anakin/convert/activation.cc
@ -1,64 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/activation.h"
 #include <algorithm>
 #include <map>
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 ActivationOpConverter<TargetT, PrecisionT>::ActivationOpConverter(
    const std::string &op_type)
    : op_type_(op_type) {
  auto it = anakin_op_types_.find(op_type_);
  PADDLE_ENFORCE(it != anakin_op_types_.end(),
                 "activation op type is not support");
  anakin_op_type_ = it->second;
 }
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void ActivationOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
  auto input_name = op_desc.Input("X").front();
  auto output_name = op_desc.Output("Out").front();
  this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
  this->engine_->AddOpAttr(op_name, "type", anakin_op_type_);
  if (op_type_ == "swish") {
    float beta = boost::get<float>(op_desc.GetAttr("beta"));
    this->engine_->AddOpAttr(op_name, "clip_relu_num", beta);
  }
  if (op_type_ == "relu6") {
    float threshold = boost::get<float>(op_desc.GetAttr("threshold"));
    this->engine_->AddOpAttr(op_name, "clip_relu_num", threshold);
  }
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
 REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
 REGISTER_ANAKIN_OP_CONVERTER(swish, SwishOpConverter);
 REGISTER_ANAKIN_OP_CONVERTER(relu6, Relu6OpConverter);
--- a/paddle/fluid/inference/anakin/convert/activation.h
+++ b/paddle/fluid/inference/anakin/convert/activation.h
@ -1,72 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <map>
 #include <string>
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
 public:
  explicit ActivationOpConverter(const std::string &op_type);
  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::BlockDesc &block_desc,
                          const framework::Scope &scope,
                          bool test_mode) override;
  virtual ~ActivationOpConverter() {}
 private:
  std::string op_type_;
  std::string anakin_op_type_;
  std::map<std::string, std::string> anakin_op_types_{{"tanh", "TanH"},
                                                      {"sigmoid", "Sigmoid"},
                                                      {"relu6", "ClippedRelu"},
                                                      {"swish", "Swish"}};
 };
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class TanhOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
 public:
  TanhOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("tanh") {}
 };
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
 public:
  SigmoidOpConverter()
      : ActivationOpConverter<TargetT, PrecisionT>("sigmoid") {}
 };
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class Relu6OpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
 public:
  Relu6OpConverter() : ActivationOpConverter<TargetT, PrecisionT>("relu6") {}
 };
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class SwishOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
 public:
  SwishOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("swish") {}
 };
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/affine_channel.cc
+++ b/paddle/fluid/inference/anakin/convert/affine_channel.cc
@ -1,55 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/affine_channel.h"
 #include <algorithm>
 #include <string>
 #include <vector>
 #include "paddle/fluid/inference/anakin/convert/helper.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
  auto input_name = op_desc.Input("X").front();
  auto output_name = op_desc.Output("Out").front();
  this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
  // Copy the Scale to CPUPlace and get the pointer.
  auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
  PADDLE_ENFORCE_NOT_NULL(scale_v);
  auto weight1 = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
  this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
  // Copy the Bias to CPUPlace and get the pointer.
  auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
  PADDLE_ENFORCE_NOT_NULL(bias_v);
  auto weight2 = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
  this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
--- a/paddle/fluid/inference/anakin/convert/affine_channel.h
+++ b/paddle/fluid/inference/anakin/convert/affine_channel.h
@ -1,40 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <memory>
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class AffineChannelOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
 public:
  AffineChannelOpConverter() = default;
  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::BlockDesc &block_desc,
                          const framework::Scope &scope,
                          bool test_mode) override;
  virtual ~AffineChannelOpConverter() {}
 private:
 };
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/batch_norm.cc
+++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc
@ -1,85 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/batch_norm.h"
 #include <math.h>
 #include <algorithm>
 #include <map>
 #include <string>
 #include <vector>
 #include "paddle/fluid/inference/anakin/convert/helper.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
  std::map<std::string, std::string> inputs;
  for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) {
    PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL);
  }
  auto input = op_desc.Input("X").front();
  auto output = op_desc.Output("Y").front();
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
  auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
  auto bn_op_name = op_name + ":bn";
  auto bn_output = bn_op_name + "_output";
  this->engine_->AddOp(bn_op_name, "BatchNorm", {input}, {bn_output});
  this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
  this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
  auto scale_op_name = op_name + ":scale";
  this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
  this->engine_->AddOpAttr(scale_op_name, "axis", 1);
  this->engine_->AddOpAttr(scale_op_name, "num_axes", 1);
  this->engine_->AddOpAttr(scale_op_name, "bias_term", true);
  auto *mean_v = scope.FindVar(op_desc.Input("Mean").front());
  PADDLE_ENFORCE_NOT_NULL(mean_v);
  auto weight1 = pblock_from_var<TargetT, PrecisionT>(*mean_v, this->engine_);
  this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
  auto *variance_v = scope.FindVar(op_desc.Input("Variance").front());
  PADDLE_ENFORCE_NOT_NULL(variance_v);
  auto weight2 =
      pblock_from_var<TargetT, PrecisionT>(*variance_v, this->engine_);
  this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
  auto *weight3 = pblock_from_vector<TargetT, PrecisionT>(
      std::vector<float>({1}), this->engine_);
  this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
  auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
  PADDLE_ENFORCE_NOT_NULL(scale_v);
  auto scale = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
  this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
  auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
  PADDLE_ENFORCE_NOT_NULL(bias_v);
  auto bias = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
  this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);
--- a/paddle/fluid/inference/anakin/convert/batch_norm.h
+++ b/paddle/fluid/inference/anakin/convert/batch_norm.h
@ -1,37 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class BatchNormOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
 public:
  BatchNormOpConverter() = default;
  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::BlockDesc &block_desc,
                          const framework::Scope &scope,
                          bool test_mode) override;
  virtual ~BatchNormOpConverter() {}
 };
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/concat.cc
+++ b/paddle/fluid/inference/anakin/convert/concat.cc
@ -1,41 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/concat.h"
 #include <algorithm>
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void ConcatOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  int axis = boost::get<int>(op_desc.GetAttr("axis"));
  auto input_names = op_desc.Input("X");
  auto y_name = op_desc.Output("Out").front();
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
  this->engine_->AddOp(op_name, "Concat", input_names, {y_name});
  this->engine_->AddOpAttr(op_name, "axis", axis);
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);
--- a/paddle/fluid/inference/anakin/convert/concat.h
+++ b/paddle/fluid/inference/anakin/convert/concat.h
@ -1,39 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class ConcatOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
 public:
  ConcatOpConverter() = default;
  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::BlockDesc &block_desc,
                          const framework::Scope &scope,
                          bool test_mode) override;
  virtual ~ConcatOpConverter() {}
 private:
 };
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/conv2d.cc
+++ b/paddle/fluid/inference/anakin/convert/conv2d.cc
@ -1,109 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/conv2d.h"
 #include <algorithm>
 #include <memory>
 #include <vector>
 #include "paddle/fluid/inference/anakin/convert/helper.h"
 using anakin::PTuple;
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
  PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
  PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
  auto input_name = op_desc.Input("Input").front();
  auto output_name = op_desc.Output("Output").front();
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
  this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
  auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
  PADDLE_ENFORCE_NOT_NULL(filter_v);
  auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
  auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
  PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
  const int filter_h = weight_tensor->dims()[2];
  const int filter_w = weight_tensor->dims()[3];
  auto filter_num = weight_tensor->dims()[0];
  this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
                                                 {filter_h, filter_w});
  auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
  auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
  auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
                                                 dilations);
  const int groups = boost::get<int>(op_desc.GetAttr("groups"));
  this->engine_->AddOpAttr(op_name, "group", groups);
  this->engine_->AddOpAttr(op_name, "axis", 1);
  this->engine_->AddOpAttr(op_name, "bias_term", false);
  ::anakin::saber::Shape anakin_shape(weight_shape);
  bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
  if (enable_int8) {
    const float int8_range = 127.;
    float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
    auto weight_scale =
        boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
    PBlock<TargetT> *weight1 =
        new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
    this->engine_->RegistBlock(weight1);
    float *weight_data = weight_tensor->data<float>();
    std::vector<char> weight_int8;
    int weight_num = weight_tensor->numel();
    for (int i = 0; i < weight_tensor->numel(); i++) {
      bool is_valid_int8 =
          ((weight_data[i] >= -128) && (weight_data[i] <= 127));
      PADDLE_ENFORCE(is_valid_int8,
                     "We are in anakin subgraph int8 mode, the weight of conv "
                     "should be in range [-128, 127]");
      weight_int8.push_back(static_cast<char>(weight_data[i]));
    }
    memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
           static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
    weight1->d_tensor().set_shape(anakin_shape);
    weight1->d_tensor().copy_from(weight1->h_tensor());
    this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
    this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
    this->engine_->Graph()->SetWeightsScale(
        op_name, {weight_scale[0] / int8_range}, false);
    this->engine_->AddTensorScale(input_name, in_scale / int8_range);
  } else {
    auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
        *weight_tensor, weight_shape, this->engine_);
    this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
  }
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter);
--- a/paddle/fluid/inference/anakin/convert/conv2d.h
+++ b/paddle/fluid/inference/anakin/convert/conv2d.h
@ -1,37 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 class Conv2dOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
 public:
  Conv2dOpConverter() = default;
  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::BlockDesc &block_desc,
                          const framework::Scope &scope,
                          bool test_mode) override;
  virtual ~Conv2dOpConverter() {}
 };
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
@ -1,115 +0,0 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
 #include <algorithm>
 #include <memory>
 #include <vector>
 #include "paddle/fluid/inference/anakin/convert/helper.h"
 using anakin::PTuple;
 namespace paddle {
 namespace inference {
 namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionT>
 void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
    const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
    const framework::Scope &scope, bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
  PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
  PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
  PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
  PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
  auto input_name = op_desc.Input("Input").front();
  auto output_name = op_desc.Output("Output").front();
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
  this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
  auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
  PADDLE_ENFORCE_NOT_NULL(filter_v);
  auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
  auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
  auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
  PADDLE_ENFORCE_NOT_NULL(b_v);
  PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
  const int filter_h = weight_tensor->dims()[2];
  const int filter_w = weight_tensor->dims()[3];
  auto filter_num = weight_tensor->dims()[0];
  this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
                                                 {filter_h, filter_w});
  auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
  auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
  auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
  this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
                                                 dilations);
  const int groups = boost::get<int>(op_desc.GetAttr("groups"));
  this->engine_->AddOpAttr(op_name, "group", groups);
  this->engine_->AddOpAttr(op_name, "axis", 1);
  this->engine_->AddOpAttr(op_name, "bias_term", true);
  ::anakin::saber::Shape anakin_shape(weight_shape);
  bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
  if (enable_int8) {
    const float int8_range = 127.;
    float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
    auto weight_scale =
        boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
    PBlock<TargetT> *weight1 =
        new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
    this->engine_->RegistBlock(weight1);
    float *weight_data = weight_tensor->data<float>();
    std::vector<char> weight_int8;
    int weight_num = weight_tensor->numel();
    for (int i = 0; i < weight_tensor->numel(); i++) {
      bool is_valid_int8 =
          ((weight_data[i] >= -128) && (weight_data[i] <= 127));
      PADDLE_ENFORCE(is_valid_int8,
                     "We are in anakin subgraph int8 mode, the weight of conv "
                     "should be in range [-128, 127]");
      weight_int8.push_back(static_cast<char>(weight_data[i]));
    }
    memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
           static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
    weight1->d_tensor().set_shape(anakin_shape);
    weight1->d_tensor().copy_from(weight1->h_tensor());
    this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
    this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
    this->engine_->Graph()->SetWeightsScale(
        op_name, {weight_scale[0] / int8_range}, false);
    this->engine_->AddTensorScale(input_name, in_scale / int8_range);
  } else {
    auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
    auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
    auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
        *weight_tensor, weight_shape, this->engine_);
    this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
    auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
    this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
  }
 }
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
 REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);
--- a/Show More
+++ b/Show More