1. Add ANAKIN_ROOT compile option

2. refine trt code
test=develop
move-code
nhzlx 6 years ago
parent 4f4daa4b66
commit f3a2e4b3d8

@ -66,7 +66,6 @@ option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_ANAKIN_SUBGRAPH "Compile with Anakin subgraph library" OFF)
option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
@ -192,6 +191,7 @@ include(configure) # add paddle env configuration
if(WITH_GPU)
include(cuda)
include(tensorrt)
include(anakin_subgraph)
endif()
if(WITH_MKL OR WITH_MKLML)
include(external/anakin)

@ -0,0 +1,32 @@
if(NOT WITH_GPU)
return()
endif()
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()
if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()

@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()

@ -17,7 +17,7 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()
if (WITH_ANAKIN_SUBGRAPH)
if (ANAKIN_FOUND)
add_subdirectory(anakin)
endif()

@ -1,4 +1,4 @@
cc_library(anakin_engine SRCS engine.cc)
cc_library(anakin_engine SRCS engine.cc DEPS framework_proto)
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)

@ -1,19 +1,19 @@
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op)
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op)
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op)
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op)
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op)
cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling SERIAL)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split SERIAL)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split SERIAL)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op SERIAL)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL SERIAL)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax SERIAL)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op SERIAL)
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op SERIAL)
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL)
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL)
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL)
#cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor SERIAL)

@ -26,7 +26,7 @@ static void test_activation_op(const std::string &op_type) {
PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;

@ -24,7 +24,7 @@ TEST(batch_norm_op, test) {
{"batch_norm_scale", "batch_norm_bias", "batch_norm_mean",
"batch_norm_variance"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
std::vector<int> param_shape{2};
validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5});

@ -24,7 +24,7 @@ namespace anakin {
TEST(concat_op, test) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
@ -47,7 +47,7 @@ TEST(concat_op, test) {
TEST(concat_op, test2) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("concat_x1", {1, 4});
validator.DeclInputVar("concat_x2", {3, 4});
validator.DeclInputVar("concat_x3", {2, 4});

@ -27,7 +27,7 @@ TEST(conv2d_op, test) {
ASSERT_TRUE(conv2d_converter != nullptr);
std::unordered_set<std::string> parameters({"conv2d-Y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});

@ -24,7 +24,7 @@ namespace anakin {
TEST(dropout_op, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2});
validator.DeclOutputVar("mask", {1, 1, 2, 2});

@ -24,7 +24,7 @@ namespace anakin {
static void test_elementwise_op(const std::string &op_type) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclInputVar("y", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2});

@ -26,7 +26,7 @@ TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("mul_x", {1, 1, 2, 2});
validator.DeclParamVar("mul_y", {4, 2});
validator.DeclOutputVar("mul_out", {1, 2});

@ -26,7 +26,7 @@ TEST(flatten_op, test) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("flatten-X", {3, 10, 10, 4});
validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1});
framework::OpDesc desc;

@ -24,7 +24,7 @@ namespace anakin {
TEST(im2sequence_op, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
std::vector<int> kernels = {6, 1};
std::vector<int> strides = {1, 1};

@ -27,7 +27,7 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
@ -72,7 +72,7 @@ void test_pool2d2(bool global_pooling, bool ceil_mode,
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.

@ -26,7 +26,7 @@ static void test_activation_op(const std::string &op_type) {
PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;

@ -25,7 +25,7 @@ TEST(reshape, test) {
ASSERT_TRUE(converter);
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
@ -48,7 +48,7 @@ TEST(reshape, test) {
TEST(reshape, test2) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("reshape-X", {1, 2, 4});
validator.DeclOutputVar("reshape-Out", {1, 4, 2});

@ -25,10 +25,10 @@ TEST(softmax, test) {
ASSERT_TRUE(converter);
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("softmax-X", {1, 10});
validator.DeclOutputVar("softmax-Out", {1, 10});
validator.DeclInputVar("softmax-X", {1, 10, 2});
validator.DeclOutputVar("softmax-Out", {1, 10, 2});
framework::OpDesc desc;
desc.SetType("softmax");

@ -26,7 +26,7 @@ void AnakinSliceTest(const std::vector<int> &in_shape,
const std::vector<int> &sections) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("split_input", in_shape);
std::vector<std::string> output_vars;

@ -25,7 +25,7 @@ namespace anakin {
TEST(sum, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("sum_x1", {1, 2, 1, 2});
validator.DeclInputVar("sum_x2", {1, 2, 1, 2});
validator.DeclOutputVar("sum_out", {1, 2, 1, 2});

@ -25,7 +25,7 @@ TEST(transpose_op, test) {
ASSERT_TRUE(converter != nullptr);
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("transpose-X", {2, 3, 4, 5});
validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3});
@ -47,7 +47,7 @@ TEST(transpose_op, test) {
TEST(transpose_op, test2) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("transpose-X", {3, 4, 5});
validator.DeclOutputVar("transpose-Out", {3, 5, 4});

@ -84,7 +84,7 @@ class AnakinConvertValidation {
AnakinConvertValidation() = delete;
AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
framework::Scope& scope)
framework::Scope* scope)
: parameters_(parameters), scope_(scope), place_(0) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
@ -108,7 +108,7 @@ class AnakinConvertValidation {
void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
platform::CUDADeviceContext ctx(place_);
auto* x = scope_.Var(name);
auto* x = scope_->Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
RandomizeTensor(x_tensor, place_, ctx);
@ -120,13 +120,13 @@ class AnakinConvertValidation {
// should init anakin engine here.
Singleton<AnakinOpConverter>::Global().ConvertOp(
desc, parameters_, scope_, engine_.get(), true /*test_mode*/);
desc, parameters_, *scope_, engine_.get(), true /*test_mode*/);
engine_->Freeze();
std::map<std::string, std::vector<int>> temp_max_input_shape;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(*scope_,
input);
auto t_shape = framework::vectorize2int(t.dims());
while (t_shape.size() < 4) {
@ -147,14 +147,14 @@ class AnakinConvertValidation {
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
op_->Run(*scope_, place_);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto* var = scope_.FindVar(input);
auto* var = scope_->FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor});
}
@ -164,7 +164,7 @@ class AnakinConvertValidation {
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> fluid_out;
auto* var = scope_.FindVar(output);
auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out);
@ -177,7 +177,7 @@ class AnakinConvertValidation {
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> anakin_out;
auto* var = scope_.FindVar(output);
auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &anakin_out);
@ -189,15 +189,13 @@ class AnakinConvertValidation {
}
}
framework::Scope& scope() { return scope_; }
private:
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
cudaStream_t stream_;
std::unique_ptr<framework::OperatorBase> op_;
std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_;
framework::Scope* scope_;
platform::CUDAPlace place_;
};

@ -97,7 +97,11 @@ void IRPassManager::CreatePasses(Argument *argument,
bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory();
if ((!model_from_memory && use_static_engine)) {
bool int8_valid = !(model_from_memory && enable_int8);
PADDLE_ENFORCE(int8_valid,
"TRT INT8 Now don't support model load from memory.");
if ((!model_from_memory && use_static_engine) || enable_int8) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()

@ -1,4 +1,4 @@
cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc)
cc_library(subgraph_detector SRCS subgraph_detector.cc subgraph_util.cc DEPS proto_desc)
if(WITH_TESTING)
add_dependencies(subgraph_detector gtest)
endif()
@ -15,7 +15,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
endif()
if (WITH_ANAKIN_SUBGRAPH)
if (ANAKIN_FOUND)
cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller)
set(analysis_deps ${analysis_deps}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save