Optimize emb_eltwise_layernorm_plugin and support fp16 (#27128)

revert-27520-disable_pr
Pei Yang 5 years ago committed by GitHub
parent 4c5cfdea1b
commit a5ef246cac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -107,6 +107,9 @@ function(select_nvcc_arch_flags out_variable)
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(cuda_arch_bin "50")
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
add_definitions("-DSUPPORTS_CUDA_FP16")
endif()
set(cuda_arch_bin "60 61")
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)

@ -80,10 +80,10 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
plugin::DynamicPluginTensorRT* plugin = nullptr;
plugin = new plugin::EmbEltwiseLayernormPluginDynamic<float>(
auto use_fp16 = engine_->WithFp16();
auto plugin = new plugin::EmbEltwiseLayernormPluginDynamic(
input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
eps);
eps, use_fp16);
layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin);
} else {
PADDLE_THROW(platform::errors::Fatal(

@ -151,7 +151,7 @@ void trt_ernie(bool with_fp16, std::vector<float> result) {
run(config, &out_data); // serialize
run(*config_deser, &out_data); // deserialize
for (size_t i = 0; i < out_data.size(); i++) {
EXPECT_NEAR(result[i], out_data[i], 1e-6);
EXPECT_NEAR(result[i], out_data[i], 1e-2);
}
}
@ -159,13 +159,11 @@ TEST(AnalysisPredictor, no_fp16) {
std::vector<float> result = {0.597841, 0.219972, 0.182187};
trt_ernie(false, result);
}
TEST(AnalysisPredictor, fp16) {
#ifdef SUPPORTS_CUDA_FP16
std::vector<float> result = {0.598336, 0.219558, 0.182106};
TEST(AnalysisPredictor, fp16) {
std::vector<float> result = {0.59923654, 0.21923761, 0.18152587};
trt_ernie(true, result);
#endif
}
#endif // SUPPORTS_CUDA_FP16
} // namespace inference
} // namespace paddle

Loading…
Cancel
Save