You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/inference/api/paddle_pass_builder.cc

225 lines
8.0 KiB

// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#endif
#include <glog/logging.h>
namespace paddle {
void PaddlePassBuilder::AppendPass(const std::string &pass_type) {
passes_.push_back(pass_type);
}
void PaddlePassBuilder::TurnOnDebug() {
std::vector<std::string> passes;
auto it = std::begin(passes_);
while (it != std::end(passes_)) {
if (*it != "graph_viz_pass") {
it = passes_.insert(it + 1, "graph_viz_pass");
} else {
++it;
}
}
}
std::string PaddlePassBuilder::DebugString() {
std::stringstream ss;
ss << "Passes to apply:\n";
for (auto &pass : passes_) {
ss << " - " << pass << '\n';
}
return ss.str();
}
void PaddlePassBuilder::DeletePass(const std::string &pass_type) {
auto it = std::begin(passes_);
while (it != std::end(passes_)) {
if (*it == pass_type) {
it = passes_.erase(it);
} else {
++it;
}
}
}
void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) {
passes_.insert(std::begin(passes_) + idx, pass_type);
}
void PaddlePassBuilder::DeletePass(size_t idx) {
passes_.erase(std::begin(passes_) + idx);
}
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
analysis_passes_.push_back(pass);
}
void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
const std::vector<std::string> kTRTSubgraphPasses({
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"shuffle_channel_detect_pass", //
"quant_conv2d_dequant_fuse_pass", //
"delete_quant_dequant_op_pass", //
// "fc_fuse_pass", //
"simplify_with_basic_ops_pass", //
"embedding_eltwise_layernorm_fuse_pass", //
"multihead_matmul_fuse_pass_v2", //
"skip_layernorm_fuse_pass", //
"conv_bn_fuse_pass", //
"fc_fuse_pass", //
"tensorrt_subgraph_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass",
});
const std::vector<std::string> kLiteSubgraphPasses({
#ifdef PADDLE_WITH_LITE
"lite_subgraph_pass",
#endif
});
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
// "identity_scale_op_clean_pass", //
"is_test_pass", //
"simplify_with_basic_ops_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"embedding_eltwise_layernorm_fuse_pass", //
"multihead_matmul_fuse_pass_v2", //
"fc_fuse_pass", //
"fc_elementwise_layernorm_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass", //
// following pass should be located in the last, since it will
// work on all fused ops.
"runtime_context_cache_pass"
});
use_gpu_ = true;
}
void GpuPassStrategy::EnableCUDNN() {
if (!use_cudnn_) {
passes_.insert(passes_.begin(), "cudnn_placement_pass");
}
use_cudnn_ = true;
}
void GpuPassStrategy::EnableMKLDNN() {
LOG(ERROR) << "GPU not support MKLDNN yet";
}
void GpuPassStrategy::EnableMkldnnQuantizer() {
LOG(ERROR) << "GPU not support MKL-DNN quantization";
}
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({"simplify_with_basic_ops_pass", //
"attention_lstm_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "seqpool_concat_fuse_pass", //
"seqpool_cvm_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
// following pass should be located in the last, since
// it will work on all fused ops.
"runtime_context_cache_pass"});
use_gpu_ = false;
}
void CpuPassStrategy::EnableCUDNN() { LOG(ERROR) << "CPU not support cuDNN"; }
void CpuPassStrategy::EnableMKLDNN() {
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_) {
passes_.insert(passes_.begin(), "mkldnn_placement_pass");
for (auto &pass : std::vector<std::string>({
"depthwise_conv_mkldnn_pass", //
"conv_bn_fuse_pass", // Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"conv_bias_mkldnn_fuse_pass", //
"conv_transpose_bias_mkldnn_fuse_pass",
"conv3d_bias_mkldnn_fuse_pass", //
"conv_elementwise_add_mkldnn_fuse_pass",
"conv_concat_relu_mkldnn_fuse_pass",
"conv_relu_mkldnn_fuse_pass", //
"conv_leaky_relu_mkldnn_fuse_pass", //
"conv_relu6_mkldnn_fuse_pass", //
"conv_swish_mkldnn_fuse_pass", //
"scale_matmul_fuse_pass", //
// Disabled due to topology-dependent speed-up
// "fc_mkldnn_pass",
"mkldnn_inplace_pass", // This pass should be activated after
// fuses
})) {
passes_.push_back(pass);
}
}
use_mkldnn_ = true;
#else
use_mkldnn_ = false;
#endif
}
void CpuPassStrategy::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_quantizer_) {
passes_.push_back("cpu_quantize_placement_pass");
}
use_mkldnn_quantizer_ = true;
#else
use_mkldnn_quantizer_ = false;
#endif
}
} // namespace paddle