1. Add ANAKIN_ROOT compile option

2. refine trt code
test=develop
move-code
nhzlx 6 years ago
parent 4f4daa4b66
commit f3a2e4b3d8

@ -66,7 +66,6 @@ option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_ANAKIN_SUBGRAPH "Compile with Anakin subgraph library" OFF)
option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF) option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON) option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
@ -192,6 +191,7 @@ include(configure) # add paddle env configuration
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
include(tensorrt) include(tensorrt)
include(anakin_subgraph)
endif() endif()
if(WITH_MKL OR WITH_MKLML) if(WITH_MKL OR WITH_MKLML)
include(external/anakin) include(external/anakin)

@ -0,0 +1,32 @@
if(NOT WITH_GPU)
return()
endif()
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()
if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()

@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT) add_definitions(-DPADDLE_WITH_TENSORRT)
endif() endif()

@ -17,7 +17,7 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
if (WITH_ANAKIN_SUBGRAPH) if (ANAKIN_FOUND)
add_subdirectory(anakin) add_subdirectory(anakin)
endif() endif()

@ -1,4 +1,4 @@
cc_library(anakin_engine SRCS engine.cc) cc_library(anakin_engine SRCS engine.cc DEPS framework_proto)
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto) cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
target_link_libraries(anakin_engine anakin anakin_saber_common) target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine) cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)

@ -1,19 +1,19 @@
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv) cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter) cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling) cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling SERIAL)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split) cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split SERIAL)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split) cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split SERIAL)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op) cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op SERIAL)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL) cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL SERIAL)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax) cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax SERIAL)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op) cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op SERIAL)
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op) cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op SERIAL)
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op) cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL)
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op) cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL)
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op) cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL)
cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col) #cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor) cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor SERIAL)

@ -26,7 +26,7 @@ static void test_activation_op(const std::string &op_type) {
PADDLE_ENFORCE(converter != nullptr); PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1}); validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc; framework::OpDesc desc;

@ -24,7 +24,7 @@ TEST(batch_norm_op, test) {
{"batch_norm_scale", "batch_norm_bias", "batch_norm_mean", {"batch_norm_scale", "batch_norm_bias", "batch_norm_mean",
"batch_norm_variance"}); "batch_norm_variance"});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
std::vector<int> param_shape{2}; std::vector<int> param_shape{2};
validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5}); validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5});

@ -24,7 +24,7 @@ namespace anakin {
TEST(concat_op, test) { TEST(concat_op, test) {
std::unordered_set<std::string> parameters({""}); std::unordered_set<std::string> parameters({""});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("concat_x1", {1, 2, 1, 1}); validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
validator.DeclInputVar("concat_x2", {1, 3, 1, 1}); validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
validator.DeclInputVar("concat_x3", {1, 1, 1, 1}); validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
@ -47,7 +47,7 @@ TEST(concat_op, test) {
TEST(concat_op, test2) { TEST(concat_op, test2) {
std::unordered_set<std::string> parameters({""}); std::unordered_set<std::string> parameters({""});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("concat_x1", {1, 4}); validator.DeclInputVar("concat_x1", {1, 4});
validator.DeclInputVar("concat_x2", {3, 4}); validator.DeclInputVar("concat_x2", {3, 4});
validator.DeclInputVar("concat_x3", {2, 4}); validator.DeclInputVar("concat_x3", {2, 4});

@ -27,7 +27,7 @@ TEST(conv2d_op, test) {
ASSERT_TRUE(conv2d_converter != nullptr); ASSERT_TRUE(conv2d_converter != nullptr);
std::unordered_set<std::string> parameters({"conv2d-Y"}); std::unordered_set<std::string> parameters({"conv2d-Y"});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("conv2d-X", {1, 3, 3, 3}); validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1}); validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3}); validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});

@ -24,7 +24,7 @@ namespace anakin {
TEST(dropout_op, native) { TEST(dropout_op, native) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("x", {1, 1, 2, 2}); validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2}); validator.DeclOutputVar("out", {1, 1, 2, 2});
validator.DeclOutputVar("mask", {1, 1, 2, 2}); validator.DeclOutputVar("mask", {1, 1, 2, 2});

@ -24,7 +24,7 @@ namespace anakin {
static void test_elementwise_op(const std::string &op_type) { static void test_elementwise_op(const std::string &op_type) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("x", {1, 1, 2, 2}); validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclInputVar("y", {1, 1, 2, 2}); validator.DeclInputVar("y", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2}); validator.DeclOutputVar("out", {1, 1, 2, 2});

@ -26,7 +26,7 @@ TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul_y"}); std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("mul_x", {1, 1, 2, 2}); validator.DeclInputVar("mul_x", {1, 1, 2, 2});
validator.DeclParamVar("mul_y", {4, 2}); validator.DeclParamVar("mul_y", {4, 2});
validator.DeclOutputVar("mul_out", {1, 2}); validator.DeclOutputVar("mul_out", {1, 2});

@ -26,7 +26,7 @@ TEST(flatten_op, test) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("flatten-X", {3, 10, 10, 4}); validator.DeclInputVar("flatten-X", {3, 10, 10, 4});
validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1}); validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1});
framework::OpDesc desc; framework::OpDesc desc;

@ -24,7 +24,7 @@ namespace anakin {
TEST(im2sequence_op, native) { TEST(im2sequence_op, native) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
std::vector<int> kernels = {6, 1}; std::vector<int> kernels = {6, 1};
std::vector<int> strides = {1, 1}; std::vector<int> strides = {1, 1};

@ -27,7 +27,7 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
// The ITensor's Dims should not contain the batch size. // The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W. // So, the ITensor's Dims of input and output should be C * H * W.
@ -72,7 +72,7 @@ void test_pool2d2(bool global_pooling, bool ceil_mode,
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
// The ITensor's Dims should not contain the batch size. // The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W. // So, the ITensor's Dims of input and output should be C * H * W.

@ -26,7 +26,7 @@ static void test_activation_op(const std::string &op_type) {
PADDLE_ENFORCE(converter != nullptr); PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1}); validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc; framework::OpDesc desc;

@ -25,7 +25,7 @@ TEST(reshape, test) {
ASSERT_TRUE(converter); ASSERT_TRUE(converter);
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1}); // validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3}); // validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
@ -48,7 +48,7 @@ TEST(reshape, test) {
TEST(reshape, test2) { TEST(reshape, test2) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("reshape-X", {1, 2, 4}); validator.DeclInputVar("reshape-X", {1, 2, 4});
validator.DeclOutputVar("reshape-Out", {1, 4, 2}); validator.DeclOutputVar("reshape-Out", {1, 4, 2});

@ -25,10 +25,10 @@ TEST(softmax, test) {
ASSERT_TRUE(converter); ASSERT_TRUE(converter);
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("softmax-X", {1, 10}); validator.DeclInputVar("softmax-X", {1, 10, 2});
validator.DeclOutputVar("softmax-Out", {1, 10}); validator.DeclOutputVar("softmax-Out", {1, 10, 2});
framework::OpDesc desc; framework::OpDesc desc;
desc.SetType("softmax"); desc.SetType("softmax");

@ -26,7 +26,7 @@ void AnakinSliceTest(const std::vector<int> &in_shape,
const std::vector<int> &sections) { const std::vector<int> &sections) {
std::unordered_set<std::string> parameters({""}); std::unordered_set<std::string> parameters({""});
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("split_input", in_shape); validator.DeclInputVar("split_input", in_shape);
std::vector<std::string> output_vars; std::vector<std::string> output_vars;

@ -25,7 +25,7 @@ namespace anakin {
TEST(sum, native) { TEST(sum, native) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("sum_x1", {1, 2, 1, 2}); validator.DeclInputVar("sum_x1", {1, 2, 1, 2});
validator.DeclInputVar("sum_x2", {1, 2, 1, 2}); validator.DeclInputVar("sum_x2", {1, 2, 1, 2});
validator.DeclOutputVar("sum_out", {1, 2, 1, 2}); validator.DeclOutputVar("sum_out", {1, 2, 1, 2});

@ -25,7 +25,7 @@ TEST(transpose_op, test) {
ASSERT_TRUE(converter != nullptr); ASSERT_TRUE(converter != nullptr);
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("transpose-X", {2, 3, 4, 5}); validator.DeclInputVar("transpose-X", {2, 3, 4, 5});
validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3}); validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3});
@ -47,7 +47,7 @@ TEST(transpose_op, test) {
TEST(transpose_op, test2) { TEST(transpose_op, test2) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, &scope);
validator.DeclInputVar("transpose-X", {3, 4, 5}); validator.DeclInputVar("transpose-X", {3, 4, 5});
validator.DeclOutputVar("transpose-Out", {3, 5, 4}); validator.DeclOutputVar("transpose-Out", {3, 5, 4});

@ -84,7 +84,7 @@ class AnakinConvertValidation {
AnakinConvertValidation() = delete; AnakinConvertValidation() = delete;
AnakinConvertValidation(const std::unordered_set<std::string>& parameters, AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
framework::Scope& scope) framework::Scope* scope)
: parameters_(parameters), scope_(scope), place_(0) { : parameters_(parameters), scope_(scope), place_(0) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true)); engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
@ -108,7 +108,7 @@ class AnakinConvertValidation {
void DeclVar(const std::string& name, const std::vector<int> dim_vec) { void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
platform::CUDADeviceContext ctx(place_); platform::CUDADeviceContext ctx(place_);
auto* x = scope_.Var(name); auto* x = scope_->Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>(); auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec)); x_tensor->Resize(framework::make_ddim(dim_vec));
RandomizeTensor(x_tensor, place_, ctx); RandomizeTensor(x_tensor, place_, ctx);
@ -120,13 +120,13 @@ class AnakinConvertValidation {
// should init anakin engine here. // should init anakin engine here.
Singleton<AnakinOpConverter>::Global().ConvertOp( Singleton<AnakinOpConverter>::Global().ConvertOp(
desc, parameters_, scope_, engine_.get(), true /*test_mode*/); desc, parameters_, *scope_, engine_.get(), true /*test_mode*/);
engine_->Freeze(); engine_->Freeze();
std::map<std::string, std::vector<int>> temp_max_input_shape; std::map<std::string, std::vector<int>> temp_max_input_shape;
for (const auto& input : op_desc_->InputArgumentNames()) { for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue; if (parameters_.count(input)) continue;
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_, auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(*scope_,
input); input);
auto t_shape = framework::vectorize2int(t.dims()); auto t_shape = framework::vectorize2int(t.dims());
while (t_shape.size() < 4) { while (t_shape.size() < 4) {
@ -147,14 +147,14 @@ class AnakinConvertValidation {
std::unordered_set<std::string> neglected_output = {}) { std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op // Execute Fluid Op
platform::CUDADeviceContext ctx(place_); platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_); op_->Run(*scope_, place_);
// std::vector<framework::LoDTensor> input_vector; // std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector; // std::vector<framework::LoDTensor> output_vector;
std::map<std::string, framework::LoDTensor*> inputs; std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) { for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue; if (parameters_.count(input)) continue;
auto* var = scope_.FindVar(input); auto* var = scope_->FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor}); inputs.insert({input, tensor});
} }
@ -164,7 +164,7 @@ class AnakinConvertValidation {
for (const auto& output : op_desc_->OutputArgumentNames()) { for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue; if (neglected_output.count(output)) continue;
std::vector<float> fluid_out; std::vector<float> fluid_out;
auto* var = scope_.FindVar(output); auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out); framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out); fluid_outputs.push_back(fluid_out);
@ -177,7 +177,7 @@ class AnakinConvertValidation {
for (const auto& output : op_desc_->OutputArgumentNames()) { for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue; if (neglected_output.count(output)) continue;
std::vector<float> anakin_out; std::vector<float> anakin_out;
auto* var = scope_.FindVar(output); auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &anakin_out); framework::TensorToVector(*tensor, ctx, &anakin_out);
@ -189,15 +189,13 @@ class AnakinConvertValidation {
} }
} }
framework::Scope& scope() { return scope_; }
private: private:
std::unique_ptr<AnakinNvEngineT> engine_{nullptr}; std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
cudaStream_t stream_; cudaStream_t stream_;
std::unique_ptr<framework::OperatorBase> op_; std::unique_ptr<framework::OperatorBase> op_;
std::unique_ptr<framework::OpDesc> op_desc_; std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_; const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_; framework::Scope* scope_;
platform::CUDAPlace place_; platform::CUDAPlace place_;
}; };

@ -97,7 +97,11 @@ void IRPassManager::CreatePasses(Argument *argument,
bool use_static_engine = argument->tensorrt_use_static_engine(); bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory(); bool model_from_memory = argument->model_from_memory();
if ((!model_from_memory && use_static_engine)) { bool int8_valid = !(model_from_memory && enable_int8);
PADDLE_ENFORCE(int8_valid,
"TRT INT8 Now don't support model load from memory.");
if ((!model_from_memory && use_static_engine) || enable_int8) {
std::string model_opt_cache_dir = std::string model_opt_cache_dir =
argument->Has("model_dir") argument->Has("model_dir")
? argument->model_dir() ? argument->model_dir()

@ -1,4 +1,4 @@
cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc) cc_library(subgraph_detector SRCS subgraph_detector.cc subgraph_util.cc DEPS proto_desc)
if(WITH_TESTING) if(WITH_TESTING)
add_dependencies(subgraph_detector gtest) add_dependencies(subgraph_detector gtest)
endif() endif()
@ -15,7 +15,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
endif() endif()
if (WITH_ANAKIN_SUBGRAPH) if (ANAKIN_FOUND)
cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller) cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller)
set(analysis_deps ${analysis_deps} set(analysis_deps ${analysis_deps}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save