merge develop

revert-12646-feature/jit/xbyak
nhzlx 7 years ago
commit d50f776b27

@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f
set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")
set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
-Wno-error=unused-variable -Wno-unused-variable
-Wno-error=format-extra-args -Wno-format-extra-args
-Wno-error=comment -Wno-comment
@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-reorder
-Wno-error=cpp)
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz")
# A helper function used in Anakin, currently, to use it, one need to recursively include
# nearly all the header files.
@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}")
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz")
endif()
if (WITH_ANAKIN)

@ -263,9 +263,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='ar
paddle.fluid.layers.scatter ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logsigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
@ -306,7 +304,9 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', '
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 200, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

@ -7,6 +7,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3 boost)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU)
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context)
else()

@ -17,6 +17,8 @@
#include <string>
#include <unordered_map>
using float16 = paddle::platform::float16;
namespace paddle {
namespace framework {
@ -53,7 +55,7 @@ static DataTypeMap* InitDataTypeMap() {
RegisterType<cc_type>(retv, proto_type, #cc_type)
// NOTE: Add your customize type here.
RegType(platform::float16, proto::VarType::FP16);
RegType(float16, proto::VarType::FP16);
RegType(float, proto::VarType::FP32);
RegType(double, proto::VarType::FP64);
RegType(int, proto::VarType::INT32);

@ -0,0 +1,40 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/data_type.h"
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
TEST(DataType, float16) {
using paddle::framework::Tensor;
using paddle::platform::CPUPlace;
using paddle::platform::float16;
namespace f = paddle::framework;
f::proto::VarType::Type dtype = f::proto::VarType::FP16;
Tensor tensor;
CPUPlace cpu;
tensor.mutable_data(cpu, f::ToTypeIndex(dtype));
// test fp16 tensor
EXPECT_EQ(tensor.type(), std::type_index(typeid(float16)));
// test fp16 size
EXPECT_EQ(f::SizeOfType(f::ToTypeIndex(dtype)), 2u);
// test debug info
std::string type = "float16";
EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str());
}

@ -116,8 +116,8 @@ TEST(GraphHelperTest, Basic) {
for (size_t i = 0; i < sorted.size(); ++i) {
node_map[sorted[i]->Name()] = i;
}
ASSERT_EQ(node_map.at("op1"), 0);
ASSERT_EQ(node_map.at("op2"), 1);
ASSERT_EQ(node_map.at("op1"), 0UL);
ASSERT_EQ(node_map.at("op2"), 1UL);
ASSERT_TRUE(node_map.at("op3") < node_map.at("op5"));
}
} // namespace ir

@ -97,15 +97,15 @@ TEST(GraphTest, Basic) {
std::vector<ir::Node *> nodes(g->Nodes().begin(), g->Nodes().end());
for (ir::Node *n : nodes) {
if (n->Name() == "sum") {
ASSERT_EQ(n->inputs.size(), 3);
ASSERT_EQ(n->outputs.size(), 1);
ASSERT_EQ(n->inputs.size(), 3UL);
ASSERT_EQ(n->outputs.size(), 1UL);
} else if (n->Name() == "test_a" || n->Name() == "test_b" ||
n->Name() == "test_c") {
ASSERT_EQ(n->inputs.size(), 0);
ASSERT_EQ(n->outputs.size(), 1);
ASSERT_EQ(n->inputs.size(), 0UL);
ASSERT_EQ(n->outputs.size(), 1UL);
} else if (n->Name() == "test_out") {
ASSERT_EQ(n->inputs.size(), 1);
ASSERT_EQ(n->outputs.size(), 0);
ASSERT_EQ(n->inputs.size(), 1UL);
ASSERT_EQ(n->outputs.size(), 0UL);
}
}
ASSERT_EQ(nodes.size(), 5);

@ -29,6 +29,13 @@ TEST(OpKernelType, ToString) {
ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type),
"data_type[float]:data_layout[NCHW]:place[CPUPlace]:library_type["
"CUDNN]");
using CUDAPlace = paddle::platform::CUDAPlace;
OpKernelType op_kernel_type2(DataType::FP16, CUDAPlace(0), DataLayout::kNCHW,
LibraryType::kCUDNN);
ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type2),
"data_type[float16]:data_layout[NCHW]:place[CUDAPlace(0)]:library_"
"type[CUDNN]");
}
TEST(OpKernelType, Hash) {

@ -40,6 +40,40 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
return OpProtoAndCheckerMaker::VariableBuilder{output};
}
void OpProtoAndCheckerMaker::Reuse(const std::string& name,
const std::string& reused_name) {
bool found = false;
proto::OpProto::Var* var;
for (auto& var : proto_->inputs()) {
if (var.name() == reused_name) {
found = true;
break;
}
}
PADDLE_ENFORCE(found == true,
"Input/Output name: %s reused_name: %s, one of them is not "
"exists or not matched.",
name, reused_name);
found = false;
for (int i = 0; i < proto_->outputs().size(); ++i) {
var = proto_->mutable_outputs()->Mutable(i);
if (var->name() == name) {
PADDLE_ENFORCE(!var->has_reuse(),
"Output(%s) has been set reused var of %s", name,
var->reuse());
found = true;
var->set_reuse(reused_name);
break;
}
}
PADDLE_ENFORCE(found == true,
"Input/Output name: %s reused_name: %s, one of them is not "
"exists or not matched.",
name, reused_name);
}
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {

@ -78,6 +78,8 @@ class OpProtoAndCheckerMaker {
VariableBuilder AddOutput(const std::string &name,
const std::string &comment);
void Reuse(const std::string &name, const std::string &reused_name);
template <typename T>
TypedAttrChecker<T> &AddAttr(const std::string &name,
const std::string &comment,

@ -49,6 +49,15 @@ TEST(ProtoMaker, DuplicatedInOut) {
}
class TestInplaceProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "input of test op");
AddOutput("XOut", "output of test op").Reuse("X");
}
};
class TestInplaceProtoMaker2
: public paddle::framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "input of test op");
@ -58,12 +67,100 @@ class TestInplaceProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
};
TEST(ProtoMaker, InplaceOutput) {
paddle::framework::proto::OpProto op_proto;
paddle::framework::proto::OpProto op_proto, op_proto2;
paddle::framework::OpAttrChecker op_checker;
TestInplaceProtoMaker proto_maker;
ASSERT_THROW(proto_maker(&op_proto, &op_checker),
TestInplaceProtoMaker2 proto_maker2;
proto_maker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker2(&op_proto2, &op_checker),
paddle::platform::EnforceNotMet);
// proto_maker(&op_proto, &op_checker);
// proto_maker.Make();
// ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
}
// normal reuse
class TestReuseProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "input of test op");
AddInput("Y", "input of test op");
AddOutput("Out", "output of test op");
AddOutput("XOut", "output of test op");
// avoid destructor exception.
// Validate();
TestReuse();
}
virtual void TestReuse() {}
};
// test duplicate reuse error
class TestReuseProtoMaker2 : public TestReuseProtoMaker {
public:
void TestReuse() {
Reuse("Out", "X");
Reuse("Out", "Y");
}
};
// NotExists Input
class TestReuseProtoMaker3 : public TestReuseProtoMaker {
public:
void TestReuse() {
Reuse("Out", "NotExists");
Reuse("XOut", "X");
}
};
// NotExists Output
class TestReuseProtoMaker4 : public TestReuseProtoMaker {
public:
void TestReuse() { Reuse("NotExists", "X"); }
};
TEST(ProtoMaker, Reuse) {
paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker;
TestReuseProtoMaker proto_maker;
proto_maker(&op_proto, &op_checker);
}
// NOTE(dzhwinter):
// There is a Fatal CHECK on base class destructor, which will call abort inside
// instead of
// throw an exception. If we throw an exception in Make(), we will trigger the
// CHECK and terminate the tests.
//
// I had tried to replace the default CHECK with a exception, however, it's
// still not supported by glog.
// the details:
// https://github.com/google/glog/issues/249
// https://github.com/facebookresearch/TensorComprehensions/issues/351
/*
TEST(ProtoMaker, ReuseWithException) {
paddle::framework::proto::OpProto op_proto2, op_proto3, op_proto4;
paddle::framework::OpAttrChecker op_checker;
TestReuseProtoMaker2 proto_maker2;
TestReuseProtoMaker3 proto_maker3;
TestReuseProtoMaker4 proto_maker4;
EXPECT_THROW(proto_maker2(&op_proto2, &op_checker),
paddle::platform::EnforceNotMet);
EXPECT_THROW(proto_maker3(&op_proto3, &op_checker),
paddle::platform::EnforceNotMet);
EXPECT_THROW(proto_maker4(&op_proto4, &op_checker),
paddle::platform::EnforceNotMet);
}
void FailureFunction() {
throw std::runtime_error("Check failed in destructor.");
// return 0;
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
google::InstallFailureFunction(&FailureFunction);
return RUN_ALL_TESTS();
}
*/

@ -69,6 +69,21 @@ static DDim GetDims(const Scope& scope, const std::string& name,
}
}
static std::string GetDtype(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
return "";
}
if (var->IsType<LoDTensor>()) {
return DataTypeToString(ToDataType(var->Get<LoDTensor>().type()));
} else if (var->IsType<SelectedRows>()) {
return DataTypeToString(
ToDataType(var->Get<SelectedRows>().value().type()));
} else {
return "";
}
}
static int GetRowSize(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
@ -172,6 +187,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
if (row_size >= 0) {
ss << "[row_size=" << row_size << "]";
}
std::string dtype = GetDtype(*scope, input.second[i]);
ss << ":" << dtype;
ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")";
}

@ -15,6 +15,7 @@
#include "paddle/fluid/framework/tensor.h"
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/platform/float16.h"
namespace framework = paddle::framework;
namespace platform = paddle::platform;
@ -213,3 +214,17 @@ TEST(Tensor, Layout) {
src.set_layout(framework::DataLayout::kAnyLayout);
ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout);
}
TEST(Tensor, FP16) {
using platform::float16;
framework::Tensor src;
float16* src_ptr = src.mutable_data<float16>({2, 3}, platform::CPUPlace());
for (int i = 0; i < 2 * 3; ++i) {
src_ptr[i] = static_cast<float16>(i);
}
EXPECT_EQ(src.memory_size(), 2 * 3 * sizeof(float16));
// EXPECT a human readable error message
// src.data<uint8_t>();
// Tensor holds the wrong type, it holds N6paddle8platform7float16E at
// [/paddle/Paddle/paddle/fluid/framework/tensor_impl.h:43]
}

@ -23,6 +23,7 @@
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"

@ -176,7 +176,7 @@ struct GraphTraits<DataFlowGraph> {
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
std::pair<std::vector<Node *>, std::vector<Node *>>
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph);
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph); // NOLINT
} // namespace analysis
} // namespace inference

@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/model_store_pass.h"
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/model_store_pass.h"
namespace paddle {
namespace inference {

@ -17,6 +17,8 @@
* model in the disk, and that model can be reloaded for prediction.
*/
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {

@ -19,6 +19,7 @@ endif(APPLE)
set(inference_deps paddle_inference_api paddle_fluid_api)
if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine)
endif()
@ -63,6 +64,8 @@ endif()
if (WITH_ANAKIN) # only needed in CI
# Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
# so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
# compile the libinference_anakin_api.a and compile with anakin.so.
fetch_include_recursively(${ANAKIN_INCLUDE})
# compile the libinference_anakin_api.a and anakin.so.
nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc)
nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc)
@ -73,7 +76,7 @@ if (WITH_ANAKIN) # only needed in CI
if (WITH_TESTING)
cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
DEPS inference_anakin_api)
DEPS inference_anakin_api_shared)
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif(WITH_TESTING)
endif()

@ -18,26 +18,36 @@
namespace paddle {
PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
template <typename Target>
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) {
CHECK(Init(config));
}
bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
if (!(graph_.load(config.model_file))) {
LOG(FATAL) << "fail to load graph from " << config.model_file;
return false;
}
graph_.ResetBatchSize("input_0", config.max_batch_size);
auto inputs = graph_.get_ins();
for (auto &input_str : inputs) {
graph_.ResetBatchSize(input_str, config.max_batch_size);
}
// optimization for graph
if (!(graph_.Optimize())) {
return false;
}
// construct executer
executor_.init(graph_);
if (executor_p_ == nullptr) {
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
anakin::Precision::FP32>(graph_, true);
}
return true;
}
bool PaddleInferenceAnakinPredictor::Run(
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Run(
const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) {
for (const auto &input : inputs) {
@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run(
<< "'s type is not float";
return false;
}
auto d_tensor_in_p = executor_.get_in(input.name);
auto d_tensor_in_p = executor_p_->get_in(input.name);
auto net_shape = d_tensor_in_p->valid_shape();
if (net_shape.size() != input.shape.size()) {
LOG(ERROR) << " input " << input.name
<< "'s shape size should be equal to that of net";
return false;
}
int sum = 1;
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
if (sum > net_shape.count()) {
graph_.Reshape(input.name, input.shape);
delete executor_p_;
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
anakin::Precision::FP32>(graph_, true);
d_tensor_in_p = executor_p_->get_in(input.name);
}
anakin::saber::Shape tmp_shape;
for (auto s : input.shape) {
tmp_shape.push_back(s);
}
d_tensor_in_p->reshape(tmp_shape);
float *d_data_p = d_tensor_in_p->mutable_data();
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
d_tensor_in_p->valid_size() * sizeof(float),
@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run(
}
cudaStreamSynchronize(NULL);
}
executor_.prediction();
cudaDeviceSynchronize();
executor_p_->prediction();
cudaDeviceSynchronize();
if (output_data->empty()) {
LOG(ERROR) << "At least one output should be set with tensors' names.";
return false;
}
for (auto &output : *output_data) {
auto *tensor = executor_.get_out(output.name);
output.shape = tensor->shape();
auto *tensor = executor_p_->get_out(output.name);
output.shape = tensor->valid_shape();
if (output.data.length() < tensor->valid_size() * sizeof(float)) {
output.data.Resize(tensor->valid_size() * sizeof(float));
}
@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run(
return true;
}
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
&PaddleInferenceAnakinPredictor::get_executer() {
return executor_;
template <typename Target>
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
&PaddleInferenceAnakinPredictor<Target>::get_executer() {
return *executor_p_;
}
// the cloned new Predictor of anakin share the same net weights from original
// Predictor
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
template <typename Target>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<Target>::Clone() {
VLOG(3) << "Anakin Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
std::unique_ptr<PaddlePredictor> cls(
new PaddleInferenceAnakinPredictor<Target>());
// construct executer from other graph
auto anakin_predictor_p =
dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
if (!anakin_predictor_p) {
LOG(ERROR) << "fail to call Init";
return nullptr;
@ -103,14 +140,28 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
return std::move(cls);
}
template class PaddleInferenceAnakinPredictor<anakin::NV>;
template class PaddleInferenceAnakinPredictor<anakin::X86>;
// A factory to help create difference predictor.
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create.";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor(config));
return x;
}
if (config.target_type == AnakinConfig::NVGPU) {
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::NV>(config));
return x;
} else if (config.target_type == AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
return x;
} else {
VLOG(3) << "Anakin Predictor create on unknown platform.";
return nullptr;
}
};
} // namespace paddle

@ -20,14 +20,16 @@ limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
// from anakin
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "saber/core/shape.h"
#include "saber/saber_types.h"
namespace paddle {
template <typename Target>
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public:
PaddleInferenceAnakinPredictor() {}
@ -42,19 +44,21 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
std::unique_ptr<PaddlePredictor> Clone() override;
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
get_executer();
~PaddleInferenceAnakinPredictor() override{};
~PaddleInferenceAnakinPredictor() override {
delete executor_p_;
executor_p_ = nullptr;
};
private:
bool Init(const AnakinConfig& config);
anakin::graph::Graph<anakin::NV, anakin::saber::AK_FLOAT,
anakin::Precision::FP32>
anakin::graph::Graph<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
graph_;
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
executor_;
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>*
executor_p_{nullptr};
AnakinConfig config_;
};

@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string(model, "", "Directory of the inference model.");
DEFINE_string(model, "", "Directory of the inference model(mobile_v2).");
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::NVGPU;
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1;
@ -36,7 +38,6 @@ TEST(inference, anakin) {
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
float data[1 * 3 * 224 * 224] = {1.0f};
PaddleTensor tensor;
tensor.name = "input_0";
tensor.shape = std::vector<int>({1, 3, 224, 224});
@ -44,22 +45,20 @@ TEST(inference, anakin) {
tensor.dtype = PaddleDType::FLOAT32;
// For simplicity, we set all the slots with the same data.
std::vector<PaddleTensor> paddle_tensor_feeds;
paddle_tensor_feeds.emplace_back(std::move(tensor));
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
tensor_out.name = "prob_out";
tensor_out.shape = std::vector<int>({1000, 1});
tensor_out.shape = std::vector<int>({});
tensor_out.data = PaddleBuf();
tensor_out.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> outputs;
outputs.emplace_back(std::move(tensor_out));
std::vector<PaddleTensor> outputs(1, tensor_out);
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
float* data_o = static_cast<float*>(outputs[0].data.data());
for (size_t j = 0; j < 1000; ++j) {
for (size_t j = 0; j < outputs[0].data.length(); ++j) {
LOG(INFO) << "output[" << j << "]: " << data_o[j];
}
}

@ -20,8 +20,8 @@ limitations under the License. */
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/demo_ci/utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "utils.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);

@ -44,7 +44,7 @@ class PaddleBuf {
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Own memory.
PaddleBuf(size_t length)
explicit PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes.
void Resize(size_t length);
@ -126,9 +126,11 @@ struct NativeConfig : public PaddlePredictor::Config {
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
int max_batch_size{-1};
TargetType target_type;
};
struct TensorRTConfig : public NativeConfig {

@ -13,7 +13,8 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL)
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)

@ -20,11 +20,60 @@ namespace tensorrt {
class Conv2dOpConverter : public OpConverter {
public:
Conv2dOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
LOG(INFO)
<< "convert a fluid conv2d op to tensorrt conv layer without bias";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
auto* X = engine_->GetITensor(op_desc.Input("Input").front());
// Declare weights
auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(Y_v);
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
auto* weight_data = Y_t->mutable_data<float>(platform::CPUPlace());
PADDLE_ENFORCE_EQ(Y_t->dims().size(), 4UL);
const int n_output = Y_t->dims()[0];
const int filter_h = Y_t->dims()[2];
const int filter_w = Y_t->dims()[3];
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
const std::vector<int> dilations =
boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
const std::vector<int> strides =
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
const std::vector<int> paddings =
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
nvinfer1::DimsHW nv_ksize(filter_h, filter_w);
nvinfer1::DimsHW nv_dilations(dilations[0], dilations[1]);
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
Y_t->memory_size() / sizeof(float)};
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, Convolution, *const_cast<nvinfer1::ITensor*>(X), n_output,
nv_ksize, weight.get(), bias.get());
PADDLE_ENFORCE(layer != nullptr);
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
layer->setDilation(nv_dilations);
layer->setNbGroups(groups);
auto output_name = op_desc.Output("Output").front();
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) {
engine_->DeclareOutput(output_name);
}
}
};

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save