C-API quantization core 2 (#16396)

* C-API quantization core

test=develop

Co-authored-by: Sylwester Fraczek <sylwester.fraczek@intel.com>

* Decouple Quantizer from AnalysisPredictor

test=develop

* fixes after review

test=develop

* renamed mkldnn quantize stuff

test=develop

* remove ifdef from header file

test=develop
move-code
Wojciech Uss 6 years ago committed by GitHub
parent e41d581304
commit 09dfc7a2aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -37,18 +37,24 @@ endif(WIN32)
add_subdirectory(api)
if(WITH_MKLDNN)
set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/api/mkldnn_quantizer.cc)
set(mkldnn_quantizer_cfg mkldnn_quantizer_config)
endif()
set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor)
set(SHARED_INFERENCE_SRCS
io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${mkldnn_quantizer_src}
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder)
analysis_config ${mkldnn_quantizer_cfg} paddle_pass_builder)
else(WIN32)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS}
zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder)
zero_copy_tensor reset_tensor_array analysis_config ${mkldnn_quantizer_cfg} paddle_pass_builder)
endif(WIN32)
if(NOT APPLE)
@ -61,11 +67,11 @@ endif()
if(WIN32)
sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array
analysis_config paddle_pass_builder)
analysis_config ${mkldnn_quantizer_cfg} paddle_pass_builder)
else(WIN32)
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array
analysis_config paddle_pass_builder)
analysis_config ${mkldnn_quantizer_cfg} paddle_pass_builder)
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(paddle_fluid_shared ${os_dependency_modules})

@ -33,13 +33,19 @@ endif()
add_subdirectory(details)
cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder)
if(WITH_MKLDNN)
set(mkldnn_quantizer_src mkldnn_quantizer.cc)
set(mkldnn_quantizer_cfg mkldnn_quantizer_config)
cc_library(${mkldnn_quantizer_cfg} SRCS mkldnn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder)
endif()
cc_library(analysis_config SRCS analysis_config.cc DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder)
cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc)
cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api zero_copy_tensor
cc_library(analysis_predictor SRCS analysis_predictor.cc ${mkldnn_quantizer_src} DEPS paddle_inference_api zero_copy_tensor
reset_tensor_array analysis_config paddle_pass_builder ir_pass_manager ${inference_deps})
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS
lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config
analysis_config paddle_pass_builder zero_copy_tensor
paddle_pass_builder zero_copy_tensor
reset_tensor_array)
cc_test(test_paddle_inference_api

@ -108,6 +108,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
CP_MEMBER(use_anakin_);
CP_MEMBER(anakin_max_batchsize_);
@ -148,6 +151,26 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}
void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_)
mkldnn_quantizer_config_.reset(new MkldnnQuantizerConfig());
use_mkldnn_quantizer_ = true;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnQuantizer";
use_mkldnn_quantizer_ = false;
#endif
Update();
}
std::shared_ptr<MkldnnQuantizerConfig> AnalysisConfig::mkldnn_quantizer_config()
const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
return mkldnn_quantizer_config_;
}
void AnalysisConfig::EnableTensorRtEngine(
int workspace_size, int max_batch_size, int min_subgraph_size,
AnalysisConfig::Precision precision_mode, bool use_static) {
@ -224,15 +247,27 @@ void AnalysisConfig::Update() {
#endif
}
if (enable_memory_optim_) {
auto analysis_passes = pass_builder()->AnalysisPasses();
auto memory_opti_pass_name = "memory_optimize_pass";
bool already_exists =
std::find(analysis_passes.begin(), analysis_passes.end(),
memory_opti_pass_name) != analysis_passes.end();
if (!already_exists) {
pass_builder()->AppendAnalysisPass(memory_opti_pass_name);
// Quantization passes must come after all other optimization passes
if (use_mkldnn_quantizer_) {
if (!enable_ir_optim_) {
LOG(ERROR) << "EnableMkldnnQuantizer() only works when IR optimization "
"is enabled.";
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMkldnnQuantizer();
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnQuantizer";
use_mkldnn_quantizer_ = false;
#endif
}
#ifdef PADDLE_WITH_MKLDNN
// Do not optimize before quantization
if (enable_memory_optim_ && !use_mkldnn_quantizer_) {
#else
if (enable_memory_optim_) {
#endif
pass_builder()->AppendAnalysisPass("memory_optimize_pass");
}
if (use_anakin_) {
@ -277,6 +312,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
for (auto &item : mkldnn_enabled_op_types_) ss << item;
ss << ";";
ss << use_mkldnn_quantizer_;
ss << model_from_memory_;
ss << enable_ir_optim_;

@ -18,6 +18,7 @@
#include <fstream>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
@ -35,8 +36,13 @@
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/inference/api/mkldnn_quantizer.h"
#endif
#if PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
@ -341,10 +347,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
return true;
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
void AnalysisPredictor::OptimizeInferenceProgram() {
status_program_optimized_ = true;
void AnalysisPredictor::PrepareArgument() {
argument_.SetUseGPU(config_.use_gpu());
argument_.SetGPUDeviceId(config_.gpu_device_id());
argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
@ -390,6 +393,16 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_);
}
#ifdef PADDLE_WITH_MKLDNN
if (config_.mkldnn_quantizer_enabled()) {
LOG(INFO) << "Quantization is enabled";
argument_.SetQuantizeEnabledOpTypes(
config_.mkldnn_quantizer_config()->enabled_op_types());
argument_.SetQuantizeExcludedOpIds(
config_.mkldnn_quantizer_config()->excluded_op_ids());
}
#endif
auto passes = config_.pass_builder()->AllPasses();
if (!config_.ir_optim()) {
passes.clear();
@ -398,6 +411,13 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_.SetIrAnalysisPasses(passes);
argument_.SetAnalysisPasses(config_.pass_builder()->AnalysisPasses());
argument_.SetScopeNotOwned(scope_.get());
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
void AnalysisPredictor::OptimizeInferenceProgram() {
status_program_optimized_ = true;
PrepareArgument();
Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid());
@ -439,12 +459,31 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
if (!dynamic_cast<AnalysisPredictor *>(predictor.get())->Init(nullptr)) {
auto predictor_p = dynamic_cast<AnalysisPredictor *>(predictor.get());
if (!predictor_p->Init(nullptr)) {
return nullptr;
}
if (config.mkldnn_quantizer_enabled() && !predictor_p->MkldnnQuantize()) {
return nullptr;
}
return predictor;
}
bool AnalysisPredictor::MkldnnQuantize() {
#if PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_)
mkldnn_quantizer_ = new AnalysisPredictor::MkldnnQuantizer(
*this, config_.mkldnn_quantizer_config());
return mkldnn_quantizer_->Quantize();
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnQuantizer";
return false;
#endif
}
void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
CreateFeedFetchVar(sub_scope_);
@ -703,6 +742,13 @@ AnalysisPredictor::~AnalysisPredictor() {
scope_->DeleteScope(sub_scope_);
}
#if PADDLE_WITH_MKLDNN
if (mkldnn_quantizer_) {
delete mkldnn_quantizer_;
mkldnn_quantizer_ = nullptr;
}
#endif
// TODO(Superjomn) deduce the directory path.
std::string out_path = inference::analysis::GetMemoryCachePath(
config_.model_dir(), config_.prog_file());

@ -70,6 +70,7 @@ class AnalysisPredictor : public PaddlePredictor {
void CreateFeedFetchVar(framework::Scope *scope);
void PrepareFeedFetch();
void PrepareArgument();
void OptimizeInferenceProgram();
Argument &analysis_argument() { return argument_; }
@ -83,6 +84,8 @@ class AnalysisPredictor : public PaddlePredictor {
std::string GetSerializedProgram() const override;
bool MkldnnQuantize();
protected:
// For memory optimization.
bool need_collect_var_shapes_for_memory_optim();
@ -143,6 +146,16 @@ class AnalysisPredictor : public PaddlePredictor {
std::vector<framework::OpDesc *> fetches_;
std::map<size_t, std::string> idx2fetches_;
#if PADDLE_WITH_MKLDNN
// Helper class to perform quantization
class MkldnnQuantizer;
MkldnnQuantizer *mkldnn_quantizer_{nullptr};
#if PADDLE_WITH_TESTING
friend class MkldnnQuantizerTest;
#endif
#endif
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, wrong results and memory leak, so cache them.
std::vector<framework::LoDTensor> feed_tensors_;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,104 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
namespace paddle {
/*
* Map variable name to tensor of scaling factors scaling it to MAX=1.0.
* bool denotes whether quantization of the variable should be done to unsigned
* type.
*/
using VarQuantScale =
std::unordered_map<std::string, std::pair<bool, framework::LoDTensor>>;
class AnalysisPredictor::MkldnnQuantizer {
public:
explicit MkldnnQuantizer(
AnalysisPredictor& predictor, // NOLINT
const std::shared_ptr<MkldnnQuantizerConfig>& qconfig)
: predictor_(predictor), qconfig_(qconfig) {}
// Execute full quantization procedure.
bool Quantize();
#if PADDLE_WITH_TESTING
friend class MkldnnQuantizerTest;
#endif
private:
// Run single warmup iteration
bool RunWarmup() const;
// Gather data from variables and calculate scales for them.
bool CalculateScales();
// Calculate a scale for tensor based on ScaleAlgo rules.
void CalculateSingleScale(const std::string& op_name,
const std::string& conn_name,
const std::string& var_name,
const framework::LoDTensor& var_tensor,
bool is_unsigned);
void PrepareArgument() const;
bool RunQuantizePasses() const;
std::vector<int> ExpandQuantizedBins(std::vector<int> quantized_bins,
std::vector<int> reference_bins) const;
// Using the KL-divergence method get the most precise scaling factor.
std::pair<bool, framework::LoDTensor> GetKLScalingFactor(
const framework::LoDTensor& var_tensor, bool is_unsigned) const;
std::pair<bool, framework::LoDTensor> GetMaxChScalingFactor(
const framework::LoDTensor& var_tensor, bool is_unsigned) const;
std::pair<bool, framework::LoDTensor> GetMaxScalingFactor(
const framework::LoDTensor& var_tensor, bool is_unsigned) const;
// Returns histogram and bin width
std::pair<std::vector<int>, float> Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins = 2048) const;
// Calculate the entropy.
float SafeEntropy(std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const;
private:
AnalysisPredictor& predictor_;
const std::shared_ptr<MkldnnQuantizerConfig> qconfig_;
// A map: variable name -> scale
VarQuantScale scales_;
};
} // namespace paddle

@ -0,0 +1,40 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h"
namespace paddle {
MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
// The default configuration of scale computing algorightms
rules_["conv2d"]["Input"] = ScaleAlgo::KL;
rules_["conv2d"]["Filter"] = ScaleAlgo::MAX_CH;
rules_["conv2d"]["Bias"] = ScaleAlgo::NONE; // do not compute scale
rules_["conv2d"]["ResidualData"] = ScaleAlgo::KL;
rules_["conv2d"]["Output"] = ScaleAlgo::KL; // do not compute scale
rules_["pool2d"]["X"] = ScaleAlgo::KL;
rules_["pool2d"]["Out"] = ScaleAlgo::KL; // do not compute scale
}
ScaleAlgo MkldnnQuantizerConfig::scale_algo(
const std::string& op_type_name, const std::string& conn_name) const {
if (rules_.find(op_type_name) != rules_.end()) {
auto op_rule = rules_.at(op_type_name);
if (op_rule.find(conn_name) != op_rule.end()) return op_rule.at(conn_name);
}
return default_scale_algo_;
}
} // namespace paddle

@ -27,10 +27,14 @@
// the abstract path of this header file will be changed.
#include "paddle_api.h" // NOLINT
#include "paddle_pass_builder.h" // NOLINT
#ifdef PADDLE_WITH_MKLDNN
#include "paddle_mkldnn_quantizer_config.h" // NOLINT
#endif
namespace paddle {
class AnalysisPredictor;
struct MkldnnQuantizerConfig;
// NOTE WIP, not stable yet.
struct AnalysisConfig {
@ -186,6 +190,16 @@ struct AnalysisConfig {
mkldnn_enabled_op_types_ = op_list;
}
/** Turn on quantization.
*/
void EnableMkldnnQuantizer();
/** A boolean state telling whether the quantization is enabled.
*/
bool mkldnn_quantizer_enabled() const { return use_mkldnn_quantizer_; }
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config() const;
/** Specify the memory buffer of program and parameter
* @param prog_buffer the memory buffer of program.
* @param prog_buffer_size the size of the data.
@ -271,10 +285,14 @@ struct AnalysisConfig {
std::string serialized_info_cache_;
mutable std::unique_ptr<PassStrategy> pass_builder_;
bool use_anakin_{false};
int anakin_max_batchsize_;
std::map<std::string, std::vector<int>> anakin_max_input_shape_;
std::map<std::string, std::string> engine_opt_info_;
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
};
} // namespace paddle

@ -0,0 +1,105 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle_api.h" // NOLINT
namespace paddle {
// Algorithms for finding scale of quantized Tensors.
enum class ScaleAlgo {
NONE, // Do not compute scale
MAX, // Find scale based on the maximum absolute value
MAX_CH, // Find scale based on the maximum absolute value per channel
KL, // Find scale based on KL Divergence
};
struct MkldnnQuantizerConfig {
MkldnnQuantizerConfig();
/** Specify a quantization algorithm for a connection (input/output) of the
* operator type.
* @param op_type_name the operator's name.
* @param conn_name name of the connection (input/output) of the operator.
* @param algo the algorithm for computing scale.
*/
void SetScaleAlgo(std::string op_type_name, std::string conn_name,
ScaleAlgo algo) {
rules_[op_type_name][conn_name] = algo;
}
/** Get the quantization algorithm for a connection (input/output) of the
* operator type.
* @param op_type_name the operator's name.
* @param conn_name name of the connection (input/output) of the operator.
* @return the algorithm for computing scale.
*/
ScaleAlgo scale_algo(const std::string& op_type_name,
const std::string& conn_name) const;
/** Set the batch of data to be used for warm-up iteration.
* @param data batch of data.
*/
void SetWarmupData(std::shared_ptr<std::vector<PaddleTensor>> data) {
warmup_data_ = data;
}
/** Get the batch of data used for warm-up iteration.
* @return batch of data.
*/
std::shared_ptr<std::vector<PaddleTensor>> warmup_data() const {
return warmup_data_;
}
void SetWarmupBatchSize(int batch_size) { warmup_bs_ = batch_size; }
int warmup_batch_size() const { return warmup_bs_; }
void SetEnabledOpTypes(std::unordered_set<std::string> op_list) {
enabled_op_types_ = op_list;
}
const std::unordered_set<std::string>& enabled_op_types() const {
return enabled_op_types_;
}
void SetExcludedOpIds(std::unordered_set<int> op_ids_list) {
excluded_op_ids_ = op_ids_list;
}
const std::unordered_set<int>& excluded_op_ids() const {
return excluded_op_ids_;
}
void SetDefaultScaleAlgo(ScaleAlgo algo) { default_scale_algo_ = algo; }
ScaleAlgo default_scale_algo() const { return default_scale_algo_; }
protected:
std::map<std::string, std::map<std::string, ScaleAlgo>> rules_;
std::unordered_set<std::string> enabled_op_types_;
std::unordered_set<int> excluded_op_ids_;
std::shared_ptr<std::vector<PaddleTensor>> warmup_data_;
int warmup_bs_{1};
ScaleAlgo default_scale_algo_{ScaleAlgo::MAX};
};
} // namespace paddle

@ -107,8 +107,8 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
use_gpu_ = true;
}
void GpuPassStrategy::EnableQuantizer() {
LOG(ERROR) << "GPU not support quantization yet";
void GpuPassStrategy::EnableMkldnnQuantizer() {
LOG(ERROR) << "GPU not support MKL-DNN quantization";
}
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {

@ -30,6 +30,10 @@ class PaddlePassBuilder {
explicit PaddlePassBuilder(const std::vector<std::string> &passes)
: passes_(passes) {}
void SetPasses(std::initializer_list<std::string> passes) {
passes_ = passes;
}
/** Append a pass to the end of the passes. */
void AppendPass(const std::string &pass_type);
@ -85,9 +89,9 @@ class PassStrategy : public PaddlePassBuilder {
*/
virtual void EnableMKLDNN() {}
/** Enable quantize optimization
/** Enable MKLDNN quantize optimization
*/
virtual void EnableQuantizer() {}
virtual void EnableMkldnnQuantizer() {}
bool use_gpu() const { return use_gpu_; }
@ -130,15 +134,19 @@ class CpuPassStrategy : public PassStrategy {
#endif
}
void EnableQuantizer() override {
if (!use_quantizer_) {
void EnableMkldnnQuantizer() override {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_quantizer_) {
passes_.push_back("cpu_quantize_placement_pass");
}
use_quantizer_ = true;
use_mkldnn_quantizer_ = true;
#else
use_mkldnn_quantizer_ = false;
#endif
}
protected:
bool use_quantizer_{false};
bool use_mkldnn_quantizer_{false};
};
/** The GPU passes strategy, it is used in AnalysisPredictor with GPU mode.
@ -153,7 +161,7 @@ class GpuPassStrategy : public PassStrategy {
}
void EnableMKLDNN() override;
void EnableQuantizer() override;
void EnableMkldnnQuantizer() override;
virtual ~GpuPassStrategy() = default;
};

Loading…
Cancel
Save