Merge pull request #16330 from NHZlX/merge_anakin_branch_to_dev
Cherry-pick from PaddlePaddle:feature/anakin-engine: Anakin subgraph support.move-code
commit
fa1796a30a
@ -0,0 +1,32 @@
|
||||
if(NOT WITH_GPU)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
|
||||
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
|
||||
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
|
||||
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
|
||||
PATHS ${ANAKIN_ROOT}
|
||||
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
|
||||
NO_DEFAULT_PATH
|
||||
DOC "Path to ANAKIN library.")
|
||||
|
||||
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
|
||||
if(WITH_DSO)
|
||||
set(ANAKIN_FOUND ON)
|
||||
endif(WITH_DSO)
|
||||
else()
|
||||
set(ANAKIN_FOUND OFF)
|
||||
endif()
|
||||
|
||||
if(ANAKIN_FOUND)
|
||||
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
|
||||
include_directories(${ANAKIN_ROOT}/include)
|
||||
include_directories(${ANAKIN_ROOT}/include/saber)
|
||||
link_directories(${ANAKIN_ROOT})
|
||||
add_definitions(-DPADDLE_WITH_ANAKIN)
|
||||
endif()
|
@ -0,0 +1,85 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h"
|
||||
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
|
||||
#define GET_NODES \
|
||||
GET_IR_NODE(fill_constant); \
|
||||
GET_IR_NODE(fill_constant_out); \
|
||||
GET_IR_NODE(elementwise_mul); \
|
||||
GET_IR_NODE(elementwise_mul_out);
|
||||
|
||||
std::unique_ptr<ir::Graph> AnakinFillconstantElementwisemulFuse::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse";
|
||||
FusePassBase::Init(pattern_name, graph.get());
|
||||
|
||||
GraphPatternDetector gpd;
|
||||
auto* x = gpd.mutable_pattern()
|
||||
->NewNode("x")
|
||||
->assert_is_op_input("elementwise_mul", "X")
|
||||
->AsInput();
|
||||
|
||||
patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
|
||||
pattern_name);
|
||||
pattern(x);
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
GET_NODES;
|
||||
|
||||
PADDLE_ENFORCE(subgraph.count(x));
|
||||
auto* elementwise_in = subgraph.at(x);
|
||||
float constant_value =
|
||||
boost::get<float>(fill_constant->Op()->GetAttr("value"));
|
||||
|
||||
framework::OpDesc new_op_desc;
|
||||
new_op_desc.SetType("scale");
|
||||
new_op_desc.SetInput("X", {elementwise_in->Name()});
|
||||
new_op_desc.SetAttr("scale", constant_value);
|
||||
new_op_desc.SetAttr("bias", static_cast<float>(0.0));
|
||||
new_op_desc.SetAttr("bias_after_scale", true);
|
||||
new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
|
||||
new_op_desc.Flush();
|
||||
|
||||
// Create a new node for the fused op.
|
||||
auto* scale_op = graph->CreateOpNode(&new_op_desc);
|
||||
|
||||
IR_NODE_LINK_TO(elementwise_in, scale_op); // Input
|
||||
IR_NODE_LINK_TO(scale_op, elementwise_mul_out); // Output
|
||||
|
||||
// Delete the unneeded nodes.
|
||||
GraphSafeRemoveNodes(graph.get(),
|
||||
{fill_constant, fill_constant_out, elementwise_mul});
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse,
|
||||
paddle::framework::ir::AnakinFillconstantElementwisemulFuse);
|
@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class AnakinFillconstantElementwisemulFuse : public FusePassBase {
|
||||
public:
|
||||
virtual ~AnakinFillconstantElementwisemulFuse() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,244 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
|
||||
#include "paddle/fluid/framework/ir/node.h"
|
||||
#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
template <int times>
|
||||
std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
const std::string pattern_name =
|
||||
"simplify_anakin_detection_pattern_pass" + std::to_string(times);
|
||||
FusePassBase::Init(pattern_name, graph.get());
|
||||
|
||||
GraphPatternDetector gpd;
|
||||
std::vector<PDNode *> input_nodes;
|
||||
for (int i = 0; i < times; i++) {
|
||||
input_nodes.push_back(gpd.mutable_pattern()
|
||||
->NewNode("x" + std::to_string(i))
|
||||
->assert_is_op_input("density_prior_box", "Input")
|
||||
->AsInput());
|
||||
}
|
||||
input_nodes.push_back(gpd.mutable_pattern()
|
||||
->NewNode("x" + std::to_string(times))
|
||||
->assert_is_op_input("box_coder", "TargetBox")
|
||||
->AsInput());
|
||||
|
||||
input_nodes.push_back(gpd.mutable_pattern()
|
||||
->NewNode("x" + std::to_string(times + 1))
|
||||
->assert_is_op_input("transpose2")
|
||||
->AsInput());
|
||||
|
||||
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
|
||||
pattern(input_nodes, times);
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
|
||||
Graph *g) {
|
||||
const int kNumFields = 7;
|
||||
const int kPriorBoxLocOffset = 1;
|
||||
const int kReshape1Offset = 2;
|
||||
const int kReshape1OutOffset = 3;
|
||||
const int kPriorBoxVarOffset = 4;
|
||||
const int kReshape2Offset = 5;
|
||||
const int kReshape2OutOffset = 6;
|
||||
std::vector<Node *> nodes;
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
|
||||
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
|
||||
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
|
||||
}
|
||||
|
||||
Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
|
||||
Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
|
||||
|
||||
Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
|
||||
Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
|
||||
|
||||
Node *box_coder_third_input = subgraph.at(input_nodes[times]);
|
||||
Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
|
||||
Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
|
||||
|
||||
Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
|
||||
Node *transpose_before_nms =
|
||||
subgraph.at(pattern.GetPDNode("transpose_before_nms"));
|
||||
Node *transpose_before_nms_out =
|
||||
subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
|
||||
|
||||
Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
|
||||
Node *multiclass_nms_out =
|
||||
subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
|
||||
|
||||
std::string code_type =
|
||||
boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
|
||||
bool box_normalized =
|
||||
boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
|
||||
// auto variance =
|
||||
// boost::get<std::vector<float>>(box_coder_op->Op()->GetAttr("variance"));
|
||||
int background_label =
|
||||
boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
|
||||
float score_threshold =
|
||||
boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
|
||||
int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
|
||||
float nms_threshold =
|
||||
boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
|
||||
float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
|
||||
int keep_top_k =
|
||||
boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
|
||||
|
||||
std::vector<std::string> concat1_input_names;
|
||||
for (int i = 0; i < times; i++) {
|
||||
concat1_input_names.push_back(
|
||||
nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
|
||||
}
|
||||
|
||||
// int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
|
||||
framework::OpDesc concat1_desc;
|
||||
concat1_desc.SetType("concat");
|
||||
concat1_desc.SetInput("X", concat1_input_names);
|
||||
concat1_desc.SetAttr("axis", 2);
|
||||
concat1_desc.SetOutput("Out", {concat_out1->Name()});
|
||||
|
||||
auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
|
||||
new_add_concat_op);
|
||||
new_add_concat_op->inputs.push_back(
|
||||
nodes[i * kNumFields + kPriorBoxLocOffset]);
|
||||
}
|
||||
|
||||
framework::OpDesc new_op_desc;
|
||||
new_op_desc.SetType("detection_out");
|
||||
new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
|
||||
new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
|
||||
new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
|
||||
new_op_desc.SetAttr("code_type", code_type);
|
||||
new_op_desc.SetAttr("box_normalized", box_normalized);
|
||||
new_op_desc.SetAttr("background_label", background_label);
|
||||
new_op_desc.SetAttr("score_threshold", score_threshold);
|
||||
new_op_desc.SetAttr("nms_top_k", nms_top_k);
|
||||
new_op_desc.SetAttr("nms_threshold", nms_threshold);
|
||||
new_op_desc.SetAttr("nms_eta", nms_eta);
|
||||
new_op_desc.SetAttr("keep_top_k", keep_top_k);
|
||||
new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
|
||||
new_op_desc.Flush();
|
||||
|
||||
// Create a new node for the fused op.
|
||||
auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
|
||||
|
||||
std::unordered_set<const Node *> delete_nodes;
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
|
||||
}
|
||||
|
||||
delete_nodes.insert(concat_op1);
|
||||
delete_nodes.insert(concat_op2);
|
||||
delete_nodes.insert(concat_out2);
|
||||
delete_nodes.insert(box_coder_op);
|
||||
delete_nodes.insert(box_coder_out);
|
||||
delete_nodes.insert(transpose_before_nms);
|
||||
delete_nodes.insert(transpose_before_nms_out);
|
||||
delete_nodes.insert(multiclass_nms);
|
||||
|
||||
new_add_concat_op->outputs.push_back(concat_out1);
|
||||
concat_out1->inputs.push_back(new_add_concat_op);
|
||||
|
||||
detection_out_op->inputs.push_back(concat_out1);
|
||||
detection_out_op->inputs.push_back(box_coder_third_input);
|
||||
detection_out_op->inputs.push_back(multiclass_nms_second_input);
|
||||
detection_out_op->outputs.push_back(multiclass_nms_out);
|
||||
|
||||
concat_out1->outputs.push_back(detection_out_op);
|
||||
box_coder_third_input->outputs.push_back(detection_out_op);
|
||||
multiclass_nms_second_input->outputs.push_back(detection_out_op);
|
||||
multiclass_nms_out->inputs.push_back(detection_out_op);
|
||||
|
||||
// Delete the unneeded nodes.
|
||||
GraphSafeRemoveNodes(graph.get(), delete_nodes);
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
return graph;
|
||||
}
|
||||
|
||||
template class SimplifyAnakinDetectionPatternPass<1>;
|
||||
template class SimplifyAnakinDetectionPatternPass<2>;
|
||||
template class SimplifyAnakinDetectionPatternPass<3>;
|
||||
template class SimplifyAnakinDetectionPatternPass<4>;
|
||||
template class SimplifyAnakinDetectionPatternPass<5>;
|
||||
template class SimplifyAnakinDetectionPatternPass<6>;
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>);
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass2,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<2>);
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass3,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>);
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass4,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>);
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass5,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>);
|
||||
|
||||
REGISTER_PASS(simplify_anakin_detection_pattern_pass6,
|
||||
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>);
|
@ -0,0 +1,41 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
// There may be many transpose-flatten structures in a model, and the output of
|
||||
// these structures will be used as inputs to the concat Op. This pattern will
|
||||
// be detected by our pass. The times here represents the repeat times of this
|
||||
// structure.
|
||||
template <int times>
|
||||
class SimplifyAnakinDetectionPatternPass : public FusePassBase {
|
||||
public:
|
||||
virtual ~SimplifyAnakinDetectionPatternPass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,4 +1,5 @@
|
||||
cc_library(anakin_engine SRCS engine.cc)
|
||||
cc_library(anakin_engine SRCS engine.cc DEPS framework_proto)
|
||||
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
|
||||
target_link_libraries(anakin_engine anakin anakin_saber_common)
|
||||
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
|
||||
add_subdirectory(convert)
|
||||
|
@ -1,2 +1,19 @@
|
||||
cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
|
||||
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
|
||||
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
|
||||
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry)
|
||||
|
||||
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
|
||||
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
|
||||
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
|
||||
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling SERIAL)
|
||||
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split SERIAL)
|
||||
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split SERIAL)
|
||||
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op SERIAL)
|
||||
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL SERIAL)
|
||||
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax SERIAL)
|
||||
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op SERIAL)
|
||||
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op SERIAL)
|
||||
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL)
|
||||
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL)
|
||||
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL)
|
||||
#cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
|
||||
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor SERIAL)
|
||||
|
@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/activation.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::Shape;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
|
||||
: op_type_(op_type) {
|
||||
auto it = anakin_op_types_.find(op_type_);
|
||||
PADDLE_ENFORCE(it != anakin_op_types_.end(),
|
||||
"activation op type is not support");
|
||||
anakin_op_type_ = it->second;
|
||||
}
|
||||
|
||||
void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
|
||||
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
|
||||
auto input_name = op_desc.Input("X").front();
|
||||
auto output_name = op_desc.Output("Out").front();
|
||||
engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
|
||||
engine_->AddOpAttr(op_name, "type", anakin_op_type_);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
|
||||
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
|
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
class ActivationOpConverter : public AnakinOpConverter {
|
||||
public:
|
||||
explicit ActivationOpConverter(const std::string &op_type);
|
||||
|
||||
virtual void operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) override;
|
||||
virtual ~ActivationOpConverter() {}
|
||||
|
||||
private:
|
||||
std::string op_type_;
|
||||
std::string anakin_op_type_;
|
||||
std::map<std::string, std::string> anakin_op_types_{{"tanh", "TanH"},
|
||||
{"sigmoid", "Sigmoid"}};
|
||||
};
|
||||
|
||||
class TanhOpConverter : public ActivationOpConverter {
|
||||
public:
|
||||
TanhOpConverter() : ActivationOpConverter("tanh") {}
|
||||
};
|
||||
|
||||
class SigmoidOpConverter : public ActivationOpConverter {
|
||||
public:
|
||||
SigmoidOpConverter() : ActivationOpConverter("sigmoid") {}
|
||||
};
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,127 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::Shape;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
|
||||
std::map<std::string, std::string> inputs;
|
||||
for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) {
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL);
|
||||
auto v = op_desc.Input(k).front();
|
||||
inputs.insert({k, v});
|
||||
}
|
||||
|
||||
auto output = op_desc.Output("Y").front();
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
|
||||
auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
|
||||
// auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
|
||||
|
||||
auto bn_op_name = op_name + ":bn";
|
||||
auto bn_output = bn_op_name + "_output";
|
||||
engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
|
||||
engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
|
||||
engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
|
||||
|
||||
auto scale_op_name = op_name + ":scale";
|
||||
auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name,
|
||||
framework::LoDTensor *tensor) {
|
||||
auto *v = scope.FindVar(var_name);
|
||||
PADDLE_ENFORCE_NOT_NULL(v);
|
||||
auto *t = v->GetMutable<framework::LoDTensor>();
|
||||
tensor->Resize(t->dims());
|
||||
TensorCopySync(*t, platform::CPUPlace(), tensor);
|
||||
};
|
||||
|
||||
framework::LoDTensor bias_t;
|
||||
framework::LoDTensor mean_t;
|
||||
framework::LoDTensor scale_t;
|
||||
framework::LoDTensor variance_t;
|
||||
get_lod_tensor(inputs["Bias"], &bias_t);
|
||||
get_lod_tensor(inputs["Mean"], &mean_t);
|
||||
get_lod_tensor(inputs["Scale"], &scale_t);
|
||||
get_lod_tensor(inputs["Variance"], &variance_t);
|
||||
|
||||
auto fill_shape = [](size_t n, std::vector<int> shape) {
|
||||
shape.insert(shape.begin(), 1);
|
||||
if (shape.size() < n) {
|
||||
shape.insert(shape.end(), n - shape.size(), 1);
|
||||
}
|
||||
return shape;
|
||||
};
|
||||
Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims())));
|
||||
Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims())));
|
||||
auto *weight1 =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1);
|
||||
auto *mean_data = static_cast<float *>(weight1->h_tensor().mutable_data());
|
||||
std::copy_n(mean_t.data<float>(), mean_t.numel(), mean_data);
|
||||
engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
|
||||
|
||||
auto *weight2 =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape2);
|
||||
auto *variance_data =
|
||||
static_cast<float *>(weight2->h_tensor().mutable_data());
|
||||
std::copy_n(variance_t.data<float>(), variance_t.numel(), variance_data);
|
||||
engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
|
||||
|
||||
Shape shape3(std::vector<int>({1, 1, 1, 1}));
|
||||
auto *weight3 =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape3);
|
||||
auto *alpha_data = static_cast<float *>(weight3->h_tensor().mutable_data());
|
||||
float weight3_data[] = {1};
|
||||
std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data);
|
||||
engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
|
||||
|
||||
Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims())));
|
||||
auto *scale =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(scale_shape);
|
||||
auto *scale_data = static_cast<float *>(scale->h_tensor().mutable_data());
|
||||
std::copy_n(scale_t.data<float>(), scale_t.numel(), scale_data);
|
||||
|
||||
Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims())));
|
||||
auto *bias =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(bias_shape);
|
||||
auto *bias_data = static_cast<float *>(bias->h_tensor().mutable_data());
|
||||
std::copy_n(bias_t.data<float>(), bias_t.numel(), bias_data);
|
||||
|
||||
engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
|
||||
engine_->AddOpAttr(scale_op_name, "axis", 1);
|
||||
engine_->AddOpAttr(scale_op_name, "num_axes", 1);
|
||||
engine_->AddOpAttr(scale_op_name, "bias_term", true);
|
||||
engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
|
||||
engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);
|
@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
class BatchNormOpConverter : public AnakinOpConverter {
|
||||
public:
|
||||
BatchNormOpConverter() = default;
|
||||
|
||||
virtual void operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) override;
|
||||
virtual ~BatchNormOpConverter() {}
|
||||
};
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,51 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/concat.h"
|
||||
#include <algorithm>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::Precision;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::X86;
|
||||
using anakin::saber::Shape;
|
||||
using anakin::PBlock;
|
||||
using anakin::PTuple;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
int axis = boost::get<int>(op_desc.GetAttr("axis"));
|
||||
auto input_names = op_desc.Input("X");
|
||||
// PADDLE_ENFORCE(axis > 0,
|
||||
// "The axis attr of Concat op should be large than 0 for trt");
|
||||
|
||||
auto y_name = op_desc.Output("Out").front();
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
|
||||
|
||||
engine_->AddOp(op_name, "Concat", input_names, {y_name});
|
||||
engine_->AddOpAttr(op_name, "axis", axis);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);
|
@ -0,0 +1,37 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
class ConcatOpConverter : public AnakinOpConverter {
|
||||
public:
|
||||
ConcatOpConverter() = default;
|
||||
|
||||
virtual void operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) override;
|
||||
virtual ~ConcatOpConverter() {}
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,87 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::Shape;
|
||||
using anakin::PTuple;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
|
||||
|
||||
auto input_name = op_desc.Input("Input").front();
|
||||
auto output_name = op_desc.Output("Output").front();
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
|
||||
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
|
||||
|
||||
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
|
||||
PADDLE_ENFORCE_NOT_NULL(filter_v);
|
||||
auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
|
||||
std::unique_ptr<framework::LoDTensor> weight_tensor(
|
||||
new framework::LoDTensor());
|
||||
weight_tensor->Resize(filter_t->dims());
|
||||
TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
|
||||
|
||||
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
|
||||
|
||||
// const int n_output = weight_tensor->dims()[0];
|
||||
// const int n_input = weight_tensor->dims()[1];
|
||||
const int filter_h = weight_tensor->dims()[2];
|
||||
const int filter_w = weight_tensor->dims()[3];
|
||||
// auto filter_num = n_input * filter_h * filter_w ;
|
||||
auto filter_num = weight_tensor->dims()[0];
|
||||
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
|
||||
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
|
||||
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
|
||||
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
|
||||
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
|
||||
engine_->AddOpAttr(op_name, "group", groups);
|
||||
engine_->AddOpAttr(op_name, "axis", 1);
|
||||
engine_->AddOpAttr(op_name, "bias_term", false);
|
||||
|
||||
auto weight_shape = framework::vectorize2int(filter_t->dims());
|
||||
Shape anakin_shape(weight_shape);
|
||||
auto *weight1 =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
|
||||
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
|
||||
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
|
||||
weight1->d_tensor().set_shape(anakin_shape);
|
||||
weight1->d_tensor().copy_from(weight1->h_tensor());
|
||||
engine_->AddOpAttr(op_name, "weight_1", *weight1);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter);
|
@ -0,0 +1,113 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::Shape;
|
||||
using anakin::PTuple;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
|
||||
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
|
||||
|
||||
auto input_name = op_desc.Input("Input").front();
|
||||
auto output_name = op_desc.Output("Output").front();
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
|
||||
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
|
||||
|
||||
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
|
||||
PADDLE_ENFORCE_NOT_NULL(filter_v);
|
||||
auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
|
||||
|
||||
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
|
||||
PADDLE_ENFORCE_NOT_NULL(b_v);
|
||||
auto *b_t = b_v->GetMutable<framework::LoDTensor>();
|
||||
|
||||
std::unique_ptr<framework::LoDTensor> weight_tensor(
|
||||
new framework::LoDTensor());
|
||||
weight_tensor->Resize(filter_t->dims());
|
||||
TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
|
||||
|
||||
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
|
||||
|
||||
// const int n_output = weight_tensor->dims()[0];
|
||||
// const int n_input = weight_tensor->dims()[1];
|
||||
const int filter_h = weight_tensor->dims()[2];
|
||||
const int filter_w = weight_tensor->dims()[3];
|
||||
// auto filter_num = n_input * filter_h * filter_w ;
|
||||
auto filter_num = weight_tensor->dims()[0];
|
||||
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
|
||||
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
|
||||
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
|
||||
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
|
||||
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
|
||||
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
|
||||
engine_->AddOpAttr(op_name, "group", groups);
|
||||
engine_->AddOpAttr(op_name, "axis", 1);
|
||||
engine_->AddOpAttr(op_name, "bias_term", true);
|
||||
|
||||
auto weight_shape = framework::vectorize2int(filter_t->dims());
|
||||
Shape anakin_shape(weight_shape);
|
||||
auto *weight1 =
|
||||
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
|
||||
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
|
||||
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
|
||||
weight1->d_tensor().set_shape(anakin_shape);
|
||||
weight1->d_tensor().copy_from(weight1->h_tensor());
|
||||
engine_->AddOpAttr(op_name, "weight_1", *weight1);
|
||||
|
||||
auto bias_shape = framework::vectorize2int(b_t->dims());
|
||||
framework::LoDTensor bias_tensor;
|
||||
bias_tensor.Resize(b_t->dims());
|
||||
TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
|
||||
auto *bias_data = bias_tensor.data<float>();
|
||||
bias_shape.insert(bias_shape.begin(), 1);
|
||||
bias_shape.insert(bias_shape.begin(), 1);
|
||||
bias_shape.insert(bias_shape.begin(), 1);
|
||||
// bias_shape.push_back(1);
|
||||
// bias_shape.push_back(1);
|
||||
Shape anakin_bias_shape(bias_shape);
|
||||
|
||||
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
|
||||
anakin_bias_shape);
|
||||
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
|
||||
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
|
||||
weight2->d_tensor().set_shape(anakin_bias_shape);
|
||||
weight2->d_tensor().copy_from(weight2->h_tensor());
|
||||
engine_->AddOpAttr(op_name, "weight_2", *weight2);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);
|
@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
class Conv2dFusionOpConverter : public AnakinOpConverter {
|
||||
public:
|
||||
Conv2dFusionOpConverter() = default;
|
||||
|
||||
virtual void operator()(const framework::proto::OpDesc &op,
|
||||
const framework::Scope &scope,
|
||||
bool test_mode) override;
|
||||
virtual ~Conv2dFusionOpConverter() {}
|
||||
};
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,90 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/anakin/convert/density_prior_box.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using anakin::graph::GraphGlobalMem;
|
||||
using anakin::AK_FLOAT;
|
||||
using anakin::saber::NV;
|
||||
using anakin::saber::Shape;
|
||||
using anakin::PTuple;
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace anakin {
|
||||
|
||||
void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
|
||||
const framework::Scope& scope,
|
||||
bool test_mode) {
|
||||
framework::OpDesc op_desc(op, nullptr);
|
||||
auto input_name = op_desc.Input("Input").front();
|
||||
auto image_name = op_desc.Input("Image").front();
|
||||
auto output_name = op_desc.Output("Boxes").front();
|
||||
|
||||
auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front();
|
||||
|
||||
auto fixed_sizes =
|
||||
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes"));
|
||||
auto fixed_ratios =
|
||||
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
|
||||
auto densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
|
||||
std::vector<float> dens;
|
||||
for (auto& ele : densities) {
|
||||
dens.push_back(static_cast<float>(ele));
|
||||
}
|
||||
|
||||
// lack flip
|
||||
// auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
|
||||
auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
|
||||
for (auto& ele : variances) {
|
||||
LOG(INFO) << ele;
|
||||
}
|
||||
|
||||
// lack img_h, img_w
|
||||
auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
|
||||
auto step_w = boost::get<float>(op_desc.GetAttr("step_w"));
|
||||
auto offset = boost::get<float>(op_desc.GetAttr("offset"));
|
||||
PTuple<std::string> t_order;
|
||||
t_order.push_back("MIN");
|
||||
t_order.push_back("COM");
|
||||
t_order.push_back("MAX");
|
||||
|
||||
std::vector<float> temp_v = {};
|
||||
|
||||
engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name});
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", temp_v);
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", temp_v);
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", temp_v);
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes);
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios);
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens);
|
||||
engine_->AddOpAttr(op_name, "is_flip", static_cast<bool>(false));
|
||||
engine_->AddOpAttr(op_name, "is_clip", static_cast<bool>(false));
|
||||
engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances);
|
||||
engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
|
||||
engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
|
||||
engine_->AddOpAttr(op_name, "step_h", step_h);
|
||||
engine_->AddOpAttr(op_name, "step_w", step_w);
|
||||
engine_->AddOpAttr(op_name, "offset", offset);
|
||||
engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", t_order);
|
||||
}
|
||||
|
||||
} // namespace anakin
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue