remove anakin from code, test=develop (#22420)

revert-22710-feature/integrated_ps_api
石晓伟 5 years ago committed by GitHub
parent 0a678ca045
commit e1b0d7cbb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -151,7 +151,6 @@ if(NOT WIN32)
include(cupti) include(cupti)
endif() endif()
include(anakin_subgraph)
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure

@ -1,45 +0,0 @@
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
set(ANAKIN_FOUND ON)
else()
set(ANAKIN_FOUND OFF)
endif()
if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT})
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
if(ANAKIN_FOUND)
if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86)
message(STATUS "Compile with anakin mlu place.")
add_definitions(-DANAKIN_MLU_PLACE)
elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86)
message(STATUS "Compile with anakin bm place.")
add_definitions(-DANAKIN_BM_PLACE)
elseif(ANAKIN_X86)
message(STATUS "Compile with anakin x86 place.")
add_definitions(-DANAKIN_X86_PLACE)
endif()
endif()
if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
message(STATUS "Compile with anakin subgraph.")
set(ANAKIN_SUBGRAPH ON)
endif()

@ -109,16 +109,6 @@ if(WITH_GPU)
endif() endif()
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
endif() endif()
if(ANAKIN_FOUND)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
message(WARNING "Anakin needs CUDA >= 8.0 to compile. Force ANAKIN_FOUND = OFF")
set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDA >= 8.0." FORCE)
endif()
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force ANAKIN_FOUND = OFF")
set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
endif()
endif()
elseif(WITH_AMD_GPU) elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")

@ -137,13 +137,6 @@ function(copy_part_of_thrid_party TARGET DST)
SRCS ${LITE_BINARY_DIR}/inference_lite_lib/* SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
DSTS ${dst_dir}) DSTS ${dst_dir})
endif() endif()
if (ANAKIN_FOUND)
set(dst_dir "${DST}/third_party/install/anakin")
copy(${TARGET}
SRCS ${ANAKIN_ROOT}/*
DSTS ${dst_dir})
endif ()
endfunction() endfunction()
# inference library for only inference # inference library for only inference

@ -71,7 +71,6 @@ pass_library(identity_scale_op_clean_pass base)
pass_library(sync_batch_norm_pass base) pass_library(sync_batch_norm_pass base)
pass_library(runtime_context_cache_pass base) pass_library(runtime_context_cache_pass base)
pass_library(quant_conv2d_dequant_fuse_pass inference) pass_library(quant_conv2d_dequant_fuse_pass inference)
pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference) pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference) pass_library(delete_quant_dequant_op_pass inference)
pass_library(simplify_with_basic_ops_pass base) pass_library(simplify_with_basic_ops_pass base)
@ -81,10 +80,6 @@ if(WITH_GPU)
pass_library(cudnn_placement_pass base DEPS placement_pass_base) pass_library(cudnn_placement_pass base DEPS placement_pass_base)
endif() endif()
if(ANAKIN_SUBGRAPH)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn)
pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn)

@ -1,83 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace paddle {
namespace framework {
namespace ir {
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);
void FillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
const std::string pattern_name = "fillconstant_elementwisemul_fuse";
FusePassBase::Init(pattern_name, graph);
GraphPatternDetector gpd;
auto* x = gpd.mutable_pattern()
->NewNode("x")
->assert_is_op_input("elementwise_mul", "X")
->AsInput();
patterns::FillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
pattern_name);
pattern(x);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_NODES;
PADDLE_ENFORCE(subgraph.count(x));
auto* elementwise_in = subgraph.at(x);
float constant_value =
boost::get<float>(fill_constant->Op()->GetAttr("value"));
framework::OpDesc new_op_desc;
new_op_desc.SetType("scale");
new_op_desc.SetInput("X", {elementwise_in->Name()});
new_op_desc.SetAttr("scale", constant_value);
new_op_desc.SetAttr("bias", static_cast<float>(0.0));
new_op_desc.SetAttr("bias_after_scale", true);
new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
new_op_desc.Flush();
// Create a new node for the fused op.
auto* scale_op = graph->CreateOpNode(&new_op_desc);
IR_NODE_LINK_TO(elementwise_in, scale_op); // Input
IR_NODE_LINK_TO(scale_op, elementwise_mul_out); // Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph,
{fill_constant, fill_constant_out, elementwise_mul});
};
gpd(graph, handler);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(fillconstant_elementwisemul_fuse,
paddle::framework::ir::FillconstantElementwisemulFuse);

@ -1878,173 +1878,6 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return concat_out; return concat_out;
} }
PDNode *patterns::AnakinDetectionPattern::operator()(
std::vector<PDNode *> conv_in, int times, std::string priorbox_type,
bool is_reshape) {
// The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
const int kBoxCoderThirdInputOffset = times;
const int kMultiClassSecondInputNmsOffset = times + 1;
std::vector<PDNode *> nodes;
std::string op_after_priorbox = is_reshape ? "reshape2" : "flatten2";
for (int i = 0; i < times; i++) {
nodes.push_back(
pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
->assert_is_op(priorbox_type));
nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
->assert_is_op_output(priorbox_type, "Boxes")
->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
->assert_is_op(op_after_priorbox));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
->assert_is_op_output(priorbox_type, "Variances")
->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
->assert_is_op(op_after_priorbox));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
}
auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
->assert_is_op("box_coder")
->assert_op_has_n_inputs("box_coder", 3);
auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
->assert_is_op_output("box_coder")
->AsIntermediate();
auto transpose_before_nms =
pattern->NewNode(GetNodeName("transpose_before_nms"))
->assert_is_op("transpose2");
auto transpose_before_nms_out =
pattern->NewNode(GetNodeName("transpose_before_nms_out"))
->assert_is_op_output("transpose2")
->assert_is_op_input("multiclass_nms", "Scores")
->AsIntermediate();
auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
->assert_is_op("multiclass_nms")
->assert_op_has_n_inputs("multiclass_nms", 2);
auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
->assert_is_op_output("multiclass_nms")
->AsOutput();
std::vector<PDNode *> reshape1_outs;
std::vector<PDNode *> reshape2_outs;
for (int i = 0; i < times; i++) {
conv_in[i]->AsInput();
// prior_box
nodes[i * kNumFields]->LinksFrom({conv_in[i]});
// prior_box box out
nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxLocOffset]});
// reshape_out
nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape1Offset]});
nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxVarOffset]});
// reshape_out
nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape2Offset]});
reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
}
concat_op1->LinksFrom(reshape1_outs);
concat_op2->LinksFrom(reshape2_outs);
concat_out1->LinksFrom({concat_op1});
concat_out2->LinksFrom({concat_op2});
conv_in[kBoxCoderThirdInputOffset]->AsInput();
conv_in[kMultiClassSecondInputNmsOffset]->AsInput();
box_coder_op->LinksFrom(
{concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
box_coder_out->LinksFrom({box_coder_op});
transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
transpose_before_nms_out->LinksFrom({transpose_before_nms});
multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
.LinksTo({multiclass_nms_out});
return multiclass_nms_out;
}
PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
PDNode *elementwise_op_input) {
auto fill_constant =
pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");
auto fill_constant_out = pattern->NewNode(fill_constant_out_repr())
->assert_is_op_output("fill_constant")
->assert_is_op_input("elementwise_mul", "Y")
->AsIntermediate();
auto elementwise_mul_op =
pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul");
auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr())
->assert_is_op_output("elementwise_mul")
->AsOutput();
fill_constant_out->LinksFrom({fill_constant});
elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out});
elementwise_mul_out->LinksFrom({elementwise_mul_op});
return elementwise_mul_out;
}
void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input, void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const std::string &op_type, const std::string &op_type,
const std::string &weight_name, const std::string &weight_name,

@ -1093,37 +1093,6 @@ struct TransposeFlattenConcat : public PatternBase {
} }
}; };
struct AnakinDetectionPattern : public PatternBase {
AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
PDNode* operator()(std::vector<PDNode*> conv_inputs, int times,
std::string priorbox_type, bool is_reshape);
std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type);
}
PDNode* GetPDNode(const std::string& op_type) {
return pattern->RetrieveNode(GetNodeName(op_type));
}
};
struct FillConstantElementWiseMulFuse : public PatternBase {
FillConstantElementWiseMulFuse(PDPattern* pattern,
const std::string& name_scope)
: PatternBase(pattern, name_scope,
"anakin_fillconstant_elementwisemul_fuse") {}
PDNode* operator()(PDNode* elementwise_op_input);
// declare operator node's name
PATTERN_DECL_NODE(fill_constant);
PATTERN_DECL_NODE(fill_constant_out);
PATTERN_DECL_NODE(elementwise_mul);
PATTERN_DECL_NODE(elementwise_mul_out);
};
struct QuantDequantOpFuse : public PatternBase { struct QuantDequantOpFuse : public PatternBase {
QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope) QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "quant_dequant_fuse") {} : PatternBase(pattern, name_scope, "quant_dequant_fuse") {}

@ -1,233 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h"
namespace paddle {
namespace framework {
namespace ir {
void RunSimplifyAnakinDetection(ir::Graph *graph, int times, bool is_density,
bool is_reshape) {
const std::string pattern_name =
"simplify_anakin_detection_pattern_pass" + std::to_string(times);
std::string priorbox_type = is_density ? "density_prior_box" : "prior_box";
GraphPatternDetector gpd;
std::vector<PDNode *> input_nodes;
for (int i = 0; i < times; i++) {
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(i))
->assert_is_op_input(priorbox_type, "Input")
->AsInput());
}
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times))
->assert_is_op_input("box_coder", "TargetBox")
->AsInput());
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times + 1))
->assert_is_op_input("transpose2")
->AsInput());
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
pattern(input_nodes, times, priorbox_type, is_reshape);
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
std::vector<Node *> nodes;
for (int i = 0; i < times; i++) {
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
}
Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
Node *box_coder_third_input = subgraph.at(input_nodes[times]);
Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
Node *transpose_before_nms =
subgraph.at(pattern.GetPDNode("transpose_before_nms"));
Node *transpose_before_nms_out =
subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
Node *multiclass_nms_out =
subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
std::string code_type =
boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
bool box_normalized =
boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
int background_label =
boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
float score_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
float nms_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
int keep_top_k =
boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
std::vector<std::string> concat1_input_names;
for (int i = 0; i < times; i++) {
concat1_input_names.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
}
framework::OpDesc concat1_desc;
concat1_desc.SetType("concat");
concat1_desc.SetInput("X", concat1_input_names);
concat1_desc.SetAttr("axis", 2);
concat1_desc.SetOutput("Out", {concat_out1->Name()});
auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
new_add_concat_op);
new_add_concat_op->inputs.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]);
}
framework::OpDesc new_op_desc;
new_op_desc.SetType("detection_out");
new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
new_op_desc.SetAttr("code_type", code_type);
new_op_desc.SetAttr("box_normalized", box_normalized);
new_op_desc.SetAttr("background_label", background_label);
new_op_desc.SetAttr("score_threshold", score_threshold);
new_op_desc.SetAttr("nms_top_k", nms_top_k);
new_op_desc.SetAttr("nms_threshold", nms_threshold);
new_op_desc.SetAttr("nms_eta", nms_eta);
new_op_desc.SetAttr("keep_top_k", keep_top_k);
new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
new_op_desc.Flush();
// Create a new node for the fused op.
auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
std::unordered_set<const Node *> delete_nodes;
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
}
delete_nodes.insert(concat_op1);
delete_nodes.insert(concat_op2);
delete_nodes.insert(concat_out2);
delete_nodes.insert(box_coder_op);
delete_nodes.insert(box_coder_out);
delete_nodes.insert(transpose_before_nms);
delete_nodes.insert(transpose_before_nms_out);
delete_nodes.insert(multiclass_nms);
new_add_concat_op->outputs.push_back(concat_out1);
concat_out1->inputs.push_back(new_add_concat_op);
detection_out_op->inputs.push_back(concat_out1);
detection_out_op->inputs.push_back(box_coder_third_input);
detection_out_op->inputs.push_back(multiclass_nms_second_input);
detection_out_op->outputs.push_back(multiclass_nms_out);
concat_out1->outputs.push_back(detection_out_op);
box_coder_third_input->outputs.push_back(detection_out_op);
multiclass_nms_second_input->outputs.push_back(detection_out_op);
multiclass_nms_out->inputs.push_back(detection_out_op);
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph, delete_nodes);
};
gpd(graph, handler);
}
void SimplifyAnakinDetectionPatternPass::ApplyImpl(ir::Graph *graph) const {
const int pattern_nums = 6;
const std::string pattern_name = "simplify_anakin_detection_pattern_pass";
FusePassBase::Init(pattern_name, graph);
std::vector<bool> options = {true, false};
for (const auto &is_density : options) {
for (const auto &is_reshape : options) {
for (int i = 1; i <= pattern_nums; i++) {
RunSimplifyAnakinDetection(graph, i, is_density, is_reshape);
}
}
}
}
} // namespace ir
} // namespace framework
} // namespace paddle
typedef paddle::framework::ir::SimplifyAnakinDetectionPatternPass
priorbox_pattern;
REGISTER_PASS(simplify_anakin_priorbox_detection_out_pass, priorbox_pattern);

@ -1,39 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
// There may be many transpose-flatten structures in a model, and the output of
// these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this
// structure.
class SimplifyAnakinDetectionPatternPass : public FusePassBase {
public:
virtual ~SimplifyAnakinDetectionPatternPass() {}
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle

@ -65,7 +65,7 @@ class SubGraphFuser {
using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller; using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller;
SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller, SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller,
int min_subgraph_size, std::string name = "anakin_engine") int min_subgraph_size, std::string name = "tensorrt_engine")
: graph_(graph), : graph_(graph),
node_inside_subgraph_teller_(teller), node_inside_subgraph_teller_(teller),
min_subgraph_size_{min_subgraph_size}, min_subgraph_size_{min_subgraph_size},

@ -30,10 +30,6 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()
if (WITH_LITE) if (WITH_LITE)
add_subdirectory(lite) add_subdirectory(lite)
endif() endif()
@ -68,9 +64,6 @@ if(NOT APPLE)
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}") set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif() endif()
if(ANAKIN_FOUND)
set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
endif()
set(SHARED_INFERENCE_SRCS set(SHARED_INFERENCE_SRCS
io.cc io.cc
${CMAKE_CURRENT_SOURCE_DIR}/../framework/data_feed.cc ${CMAKE_CURRENT_SOURCE_DIR}/../framework/data_feed.cc
@ -80,8 +73,7 @@ set(SHARED_INFERENCE_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${mkldnn_quantizer_src_file} ${mkldnn_quantizer_src_file})
${ANAKIN_SHARED_INFERENCE_SRCS})
# Create shared inference library defaultly # Create shared inference library defaultly
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}

@ -1,5 +0,0 @@
cc_library(anakin_engine SRCS engine.cc DEPS framework_proto boost)
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
add_subdirectory(convert)

@ -1,23 +0,0 @@
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc shuffle_channel.cc helper.cc DEPS anakin_engine framework_proto
scope op_registry gtest gflags)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op)
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op)
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op)
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op)
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op)
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor)
cc_test(test_anakin_affine_channel SRCS test_affine_channel_op.cc DEPS anakin_op_converter affine_channel_op)

@ -1,64 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/activation.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
ActivationOpConverter<TargetT, PrecisionT>::ActivationOpConverter(
const std::string &op_type)
: op_type_(op_type) {
auto it = anakin_op_types_.find(op_type_);
PADDLE_ENFORCE(it != anakin_op_types_.end(),
"activation op type is not support");
anakin_op_type_ = it->second;
}
template <typename TargetT, ::anakin::Precision PrecisionT>
void ActivationOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "type", anakin_op_type_);
if (op_type_ == "swish") {
float beta = boost::get<float>(op_desc.GetAttr("beta"));
this->engine_->AddOpAttr(op_name, "clip_relu_num", beta);
}
if (op_type_ == "relu6") {
float threshold = boost::get<float>(op_desc.GetAttr("threshold"));
this->engine_->AddOpAttr(op_name, "clip_relu_num", threshold);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(swish, SwishOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(relu6, Relu6OpConverter);

@ -1,72 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
explicit ActivationOpConverter(const std::string &op_type);
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ActivationOpConverter() {}
private:
std::string op_type_;
std::string anakin_op_type_;
std::map<std::string, std::string> anakin_op_types_{{"tanh", "TanH"},
{"sigmoid", "Sigmoid"},
{"relu6", "ClippedRelu"},
{"swish", "Swish"}};
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class TanhOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
TanhOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("tanh") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
SigmoidOpConverter()
: ActivationOpConverter<TargetT, PrecisionT>("sigmoid") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class Relu6OpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
Relu6OpConverter() : ActivationOpConverter<TargetT, PrecisionT>("relu6") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class SwishOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
SwishOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("swish") {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -1,55 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
// Copy the Scale to CPUPlace and get the pointer.
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v);
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
// Copy the Bias to CPUPlace and get the pointer.
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v);
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter);

@ -1,40 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class AffineChannelOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
AffineChannelOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~AffineChannelOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -1,85 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
#include <math.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
std::map<std::string, std::string> inputs;
for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) {
PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL);
}
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Y").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
auto bn_op_name = op_name + ":bn";
auto bn_output = bn_op_name + "_output";
this->engine_->AddOp(bn_op_name, "BatchNorm", {input}, {bn_output});
this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
auto scale_op_name = op_name + ":scale";
this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
this->engine_->AddOpAttr(scale_op_name, "axis", 1);
this->engine_->AddOpAttr(scale_op_name, "num_axes", 1);
this->engine_->AddOpAttr(scale_op_name, "bias_term", true);
auto *mean_v = scope.FindVar(op_desc.Input("Mean").front());
PADDLE_ENFORCE_NOT_NULL(mean_v);
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*mean_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
auto *variance_v = scope.FindVar(op_desc.Input("Variance").front());
PADDLE_ENFORCE_NOT_NULL(variance_v);
auto weight2 =
pblock_from_var<TargetT, PrecisionT>(*variance_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
auto *weight3 = pblock_from_vector<TargetT, PrecisionT>(
std::vector<float>({1}), this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v);
auto scale = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v);
auto bias = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);

@ -1,37 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class BatchNormOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
BatchNormOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~BatchNormOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -1,41 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ConcatOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X");
auto y_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Concat", input_names, {y_name});
this->engine_->AddOpAttr(op_name, "axis", axis);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);

@ -1,39 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ConcatOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
ConcatOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ConcatOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -1,109 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
auto filter_num = weight_tensor->dims()[0];
this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
this->engine_->AddOpAttr(op_name, "group", groups);
this->engine_->AddOpAttr(op_name, "axis", 1);
this->engine_->AddOpAttr(op_name, "bias_term", false);
::anakin::saber::Shape anakin_shape(weight_shape);
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
if (enable_int8) {
const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
auto weight_scale =
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
PBlock<TargetT> *weight1 =
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8;
int weight_num = weight_tensor->numel();
for (int i = 0; i < weight_tensor->numel(); i++) {
bool is_valid_int8 =
((weight_data[i] >= -128) && (weight_data[i] <= 127));
PADDLE_ENFORCE(is_valid_int8,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]");
weight_int8.push_back(static_cast<char>(weight_data[i]));
}
memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
this->engine_->Graph()->SetWeightsScale(
op_name, {weight_scale[0] / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else {
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter);

@ -1,37 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class Conv2dOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
Conv2dOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Conv2dOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -1,115 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
auto filter_num = weight_tensor->dims()[0];
this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
this->engine_->AddOpAttr(op_name, "group", groups);
this->engine_->AddOpAttr(op_name, "axis", 1);
this->engine_->AddOpAttr(op_name, "bias_term", true);
::anakin::saber::Shape anakin_shape(weight_shape);
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
if (enable_int8) {
const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
auto weight_scale =
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
PBlock<TargetT> *weight1 =
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8;
int weight_num = weight_tensor->numel();
for (int i = 0; i < weight_tensor->numel(); i++) {
bool is_valid_int8 =
((weight_data[i] >= -128) && (weight_data[i] <= 127));
PADDLE_ENFORCE(is_valid_int8,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]");
weight_int8.push_back(static_cast<char>(weight_data[i]));
}
memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
this->engine_->Graph()->SetWeightsScale(
op_name, {weight_scale[0] / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else {
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save