Update the Anakin interfaces for content-dnn and MLU (#17890)

* update anakin-engine interfaces for content-dnn

test=develop

* support only-gpu mode of Anakin

modify eltwise parse

test=develop

* modification for thread-safe

test=develop

* Integrated template instance

test=develop

* increase template parameters

test=develop

* support MLU predictor

test=develop

* update anakin cmake files

test=develop

* update TargetWrapper::set_device

* update the initialization of anakin subgraph

test=develop

* use the default constructor of base class

test=develop
lite
石晓伟 6 years ago committed by GitHub
parent 410907f624
commit bce259e5bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,3 @@
if(NOT WITH_GPU)
return()
endif()
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
@ -16,9 +12,7 @@ find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
DOC "Path to ANAKIN library.")
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()
@ -31,3 +25,8 @@ if(ANAKIN_FOUND)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
message(STATUS "Compile with anakin subgraph.")
set(ANAKIN_SUBGRAPH ON)
endif()

@ -77,7 +77,7 @@ pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference)
if(ANAKIN_FOUND)
if(ANAKIN_SUBGRAPH)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
endif()

@ -17,7 +17,7 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()
if (ANAKIN_FOUND)
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()
@ -43,11 +43,15 @@ if(WITH_MKLDNN)
endif()
set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor)
if (ANAKIN_FOUND)
set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
endif()
set(SHARED_INFERENCE_SRCS
io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${mkldnn_quantizer_src}
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${ANAKIN_SHARED_INFERENCE_SRCS})
if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array

@ -60,7 +60,7 @@ void ElementwiseMulOpConverter<TargetT, PrecisionT>::operator()(
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Prod";
std::string elementwise_type = "Mul";
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0};

@ -153,11 +153,12 @@ template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;
#ifdef ANAKIN_X86_PLACE
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
@ -203,16 +204,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#else
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
@ -221,12 +222,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
__attribute__((unused)) = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__();
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#endif

@ -77,32 +77,6 @@ TEST(swish_op, gpu) {
}
#endif
/*
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("sigmoid", ctx, false);
}
TEST(tanh_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}
TEST(relu6_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu6", ctx, false);
}
TEST(swish_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("swish", ctx, false);
}
*/
} // namespace anakin
} // namespace inference
} // namespace paddle
@ -112,13 +86,7 @@ USE_OP(tanh);
USE_OP(relu6);
USE_OP(swish);
USE_CPU_ANAKIN_CONVERTER(sigmoid);
USE_CPU_ANAKIN_CONVERTER(tanh);
USE_CPU_ANAKIN_CONVERTER(relu6);
USE_CPU_ANAKIN_CONVERTER(swish);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(relu6);
USE_ANAKIN_CONVERTER(swish);
#endif

@ -57,19 +57,16 @@ TEST(affine_channel_op, gpu) {
test_affine_channel_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(affine_channel_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_affine_channel_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(affine_channel);
USE_CPU_ANAKIN_CONVERTER(affine_channel);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(affine_channel);
#endif

@ -73,19 +73,15 @@ TEST(batch_norm_op, gpu) {
test_batchnorm_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(batch_norm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_batchnorm_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(batch_norm);
USE_CPU_ANAKIN_CONVERTER(batch_norm);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(batch_norm);
#endif

@ -53,19 +53,15 @@ TEST(concat_op, gpu) {
test_concat_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(concat_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_concat_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(concat);
USE_CPU_ANAKIN_CONVERTER(concat);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(concat);
#endif

@ -60,20 +60,16 @@ TEST(conv2d_op, gpu) {
test_conv2d_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(conv2d_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_conv2d_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(conv2d);
USE_CPU_ANAKIN_CONVERTER(conv2d);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(conv2d);
#endif

@ -54,19 +54,16 @@ TEST(dropout_op, gpu) {
test_dropout_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(dropout_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_dropout_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(dropout);
USE_CPU_ANAKIN_CONVERTER(dropout);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(dropout);
#endif

@ -59,29 +59,23 @@ TEST(elementwise_op, native_mul_gpu) {
test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(elementwise_op, native_add_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false);
}
TEST(elementwise_op, native_mul_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(elementwise_add);
USE_OP(elementwise_mul);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_mul);
#endif
USE_CPU_ANAKIN_CONVERTER(elementwise_add);
USE_CPU_ANAKIN_CONVERTER(elementwise_mul);

@ -49,19 +49,16 @@ TEST(mul_op, gpu) {
test_mul_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(mul_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_mul_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(mul);
USE_CPU_ANAKIN_CONVERTER(fc);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(fc);
#endif

@ -48,20 +48,17 @@ TEST(flatten_op, gpu) {
test_flatten_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(flatten_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_flatten_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(reshape);
USE_OP_ITSELF(flatten);
USE_CPU_ANAKIN_CONVERTER(flatten);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(flatten);
#endif

@ -87,7 +87,7 @@ TEST(Pool2dOpConverter, avg_ceil_test) {
test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg");
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(Pool2dOpConverter, normal_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
@ -110,14 +110,10 @@ TEST(Pool2dOpConverter, avg_ceil_test_cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg");
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(pool2d);
USE_CPU_ANAKIN_CONVERTER(pool2d);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(pool2d);
#endif

@ -66,10 +66,5 @@ TEST(leaky_relu_op, gpu) {
USE_OP(relu);
USE_OP(leaky_relu);
USE_CPU_ANAKIN_CONVERTER(relu);
USE_CPU_ANAKIN_CONVERTER(leaky_relu);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(leaky_relu);
#endif

@ -81,7 +81,7 @@ TEST(reshape2_op, gpu) {
test_reshape2_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(reshape1_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
@ -93,14 +93,10 @@ TEST(reshape2_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_reshape2_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(reshape);
USE_CPU_ANAKIN_CONVERTER(reshape);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(reshape);
#endif

@ -48,20 +48,16 @@ TEST(softmax_op, gpu) {
test_softmax_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_softmax_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(softmax);
USE_CPU_ANAKIN_CONVERTER(softmax);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(softmax);
#endif

@ -92,7 +92,7 @@ TEST(split_op, test_different_shape_axis3_batch1) {
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 3}, {2, 1});
}
#ifdef ANAKIN_X86_PLACE
TEST(split_op, test_different_shape_axis1_batch1_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
@ -110,13 +110,10 @@ TEST(split_op, test_different_shape_axis3_batch1_cpu) {
platform::CPUDeviceContext ctx(cpu_place);
AnakinSliceTest<::anakin::saber::X86, 3>(ctx, false, {1, 3, 2, 4}, {2, 2});
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(split);
USE_CPU_ANAKIN_CONVERTER(split);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(split);
#endif

@ -49,19 +49,16 @@ TEST(sum_op, gpu) {
test_sum_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(sum_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_sum_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(sum);
USE_CPU_ANAKIN_CONVERTER(sum);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(sum);
#endif

@ -79,7 +79,7 @@ TEST(transpose2_op, gpu) {
test_transpose2_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(transpose1_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
@ -91,13 +91,10 @@ TEST(transpose2_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_transpose2_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(transpose);
USE_CPU_ANAKIN_CONVERTER(transpose);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(transpose);
#endif

@ -33,7 +33,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
using anakin::Precision;
using anakin::saber::X86;
namespace paddle {
namespace inference {
@ -215,13 +214,14 @@ class AnakinConvertValidation {
template class AnakinConvertValidation<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinConvertValidation<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinConvertValidation<::anakin::saber::NV,
::anakin::Precision::INT8>;
#ifdef ANAKIN_X86_PLACE
template class AnakinConvertValidation<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinConvertValidation<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -46,10 +46,9 @@ AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
max_input_shape_(max_input_shape),
program_inputs_(program_inputs),
auto_config_layout_(auto_config_layout) {
std::call_once(init_anakin_, [this]() {
::anakin::TargetWrapper<TargetT>::set_device(device_);
::anakin::Env<TargetT>::env_init();
});
::anakin::TargetWrapper<TargetT>::set_device(device_);
std::call_once(init_anakin_,
[this]() { ::anakin::Env<TargetT>::env_init(); });
graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
}
@ -194,14 +193,14 @@ template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::INT8>;
template class AnakinEngineManager<::anakin::saber::NV,
::anakin::Precision::INT8>;
#endif
#ifdef ANAKIN_X86_PLACE
template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
template class AnakinEngineManager<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::INT8>;
template class AnakinEngineManager<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
} // namespace anakin
} // namespace inference

@ -24,7 +24,9 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#ifdef EXIT // NOLINT
#undef EXIT // NOLINT
#endif // NOLINT
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"

@ -22,7 +22,6 @@ limitations under the License. */
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save