From: @xutianchun
Reviewed-by: 
Signed-off-by:
pull/11040/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 1908d6b8c4

@ -21,7 +21,7 @@
#include "include/errorcode.h" #include "include/errorcode.h"
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -24,7 +24,7 @@
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <float.h> #include <float.h>
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -21,8 +21,7 @@
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <float.h> #include <float.h>
#include "src/param_value_lite.h"
#include "tools/converter/quantizer/quantize_util.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h" #include "src/ops/ops_register.h"

@ -18,7 +18,7 @@
#include <memory> #include <memory>
#include <utility> #include <utility>
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -19,8 +19,7 @@
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <float.h> #include <float.h>
#include "src/param_value_lite.h"
#include "tools/converter/quantizer/quantize_util.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <float.h> #include <float.h>
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE #ifdef PRIMITIVE_WRITEABLE
#include <float.h> #include <float.h>
#include "tools/converter/quantizer/quantize_util.h" #include "src/param_value_lite.h"
#endif #endif
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE

@ -389,7 +389,9 @@ void PrimitiveC::set_input_quant_params(const std::vector<std::vector<schema::Qu
} }
void PrimitiveC::set_input_quant_param(const size_t &index, const std::vector<schema::QuantParamT> &input_quant_param) { void PrimitiveC::set_input_quant_param(const size_t &index, const std::vector<schema::QuantParamT> &input_quant_param) {
MS_ASSERT(index < this->input_quant_param_.size()); if (index >= this->input_quant_param_.size()) {
this->input_quant_param_.resize(index + 1);
}
this->input_quant_param_.at(index) = input_quant_param; this->input_quant_param_.at(index) = input_quant_param;
} }
@ -495,7 +497,7 @@ std::shared_ptr<PrimitiveC> GetTupleGetItemPrim() {
} }
template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>> template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>>
std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, std::shared_ptr<PrimitiveC> NewPrimitiveC(const mindspore::Primitive &prim, const std::vector<AnfNodePtr> &inputs,
const schema::QuantType &quantType) { const schema::QuantType &quantType) {
auto primc = std::make_shared<T>(); auto primc = std::make_shared<T>();
if (primc == nullptr) { if (primc == nullptr) {

@ -204,7 +204,7 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap
ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR);
return nullptr; return nullptr;
} }
this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->quantWeightSize, this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->configFile, config->quantWeightSize,
config->quantWeightChannel, config->bitNum); config->quantWeightChannel, config->bitNum);
if (mQuantizer == nullptr) { if (mQuantizer == nullptr) {
MS_LOG(ERROR) << "New WeightQuantizer failed"; MS_LOG(ERROR) << "New WeightQuantizer failed";

@ -32,8 +32,6 @@
#include "tools/anf_exporter/anf_exporter.h" #include "tools/anf_exporter/anf_exporter.h"
#include "tools/anf_importer/import_from_mindir.h" #include "tools/anf_importer/import_from_mindir.h"
#include "proto/onnx.pb.h" #include "proto/onnx.pb.h"
#include "tools/converter/quantizer/post_training_quantizer.h"
#include "tools/converter/quantizer/quant_cast.h"
#include "include/version.h" #include "include/version.h"
namespace mindspore { namespace mindspore {

@ -16,7 +16,6 @@
#include "tools/converter/legacy_optimizer/graph/tensor_name_pass.h" #include "tools/converter/legacy_optimizer/graph/tensor_name_pass.h"
#include "tools/converter/converter_context.h" #include "tools/converter/converter_context.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/common/tensor_util.h" #include "tools/common/tensor_util.h"
namespace mindspore::lite { namespace mindspore::lite {

@ -6,7 +6,6 @@ include_directories(${3RD_DIR}/opencv/build/include/opencv4)
file(GLOB QUANTIZER file(GLOB QUANTIZER
${CMAKE_CURRENT_SOURCE_DIR}/calc_quant_param.cc ${CMAKE_CURRENT_SOURCE_DIR}/calc_quant_param.cc
${CMAKE_CURRENT_SOURCE_DIR}/quantizer.cc ${CMAKE_CURRENT_SOURCE_DIR}/quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/aware_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/quantize_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/quantize_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc ${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc ${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc

@ -28,6 +28,8 @@
#include "tools/converter/quantizer/quantizer.h" #include "tools/converter/quantizer/quantizer.h"
#include "tools/converter/converter.h" #include "tools/converter/converter.h"
#include "include/ms_tensor.h" #include "include/ms_tensor.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/converter/quantizer/weight_quantizer.h"
namespace mindspore::lite::quant { namespace mindspore::lite::quant {
class Calibrator; class Calibrator;
@ -38,19 +40,8 @@ struct MaxMin {
float max; float max;
}; };
const char kMethodMaxMin[] = "MAX_MIN";
const char kMethodKL[] = "KL";
const char kMethodOutlier[] = "RemovalOutlier";
constexpr int kDefaultBinNumber = 2048; constexpr int kDefaultBinNumber = 2048;
struct ConfigParam {
std::vector<std::string> image_paths;
uint32_t batch_count{100};
std::string method_x{kMethodKL};
uint32_t thread_num{1};
bool bias_correction{false};
};
class PostTrainingQuantizer : public Quantizer { class PostTrainingQuantizer : public Quantizer {
public: public:
PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8, PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
@ -64,14 +55,16 @@ class PostTrainingQuantizer : public Quantizer {
int quant_min{INT8_MIN}; int quant_min{INT8_MIN};
private: private:
std::map<std::string, int> opname_bit_;
bool per_channel_{true}; bool per_channel_{true};
TypeId target_type_{kNumberTypeInt8}; TypeId target_type_{kNumberTypeInt8};
std::unique_ptr<Calibrator> calibrator_; std::unique_ptr<Calibrator> calibrator_;
mindspore::lite::LiteSession *fp32_session_; session::LiteSession *fp32_session_{nullptr};
mindspore::lite::LiteSession *int8_session_; session::LiteSession *int8_session_{nullptr};
std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency
std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency
@ -112,7 +105,8 @@ class PostTrainingQuantizer : public Quantizer {
STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min, STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min,
const std::shared_ptr<PrimitiveC> &) const; const std::shared_ptr<PrimitiveC> &) const;
STATUS DoWeightQuant(const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c, bool perchannel) const; STATUS DoWeightQuant(const std::string &op_name, const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c,
bool perchannel) const;
STATUS DoBiasQuant(const AnfNodePtr &bias, const std::shared_ptr<PrimitiveC> &primitive_c); STATUS DoBiasQuant(const AnfNodePtr &bias, const std::shared_ptr<PrimitiveC> &primitive_c);
STATUS Int8Inference(); STATUS Int8Inference();
@ -213,13 +207,13 @@ class Calibrator {
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> *GetOutputDivergInfo(); std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> *GetOutputDivergInfo();
PostQuantConfig config_param_;
private: private:
std::vector<std::vector<std::string>> images_; // multi_input, echo input has multi input data std::vector<std::vector<std::string>> images_; // multi_input, echo input has multi input data
std::string config_path_; std::string config_path_;
ConfigParam config_param_;
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> inputs_diverg_info_; std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> inputs_diverg_info_;
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> outputs_diverg_info_; std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> outputs_diverg_info_;
@ -227,8 +221,6 @@ class Calibrator {
size_t bit_num_; size_t bit_num_;
int quant_max_; int quant_max_;
int quant_min_; int quant_min_;
void AddImage(const std::string &file, size_t index);
}; };
} // namespace mindspore::lite::quant } // namespace mindspore::lite::quant
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H

File diff suppressed because it is too large Load Diff

@ -17,6 +17,8 @@
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H
#include <dirent.h>
#include <sys/stat.h>
#include <memory> #include <memory>
#include <string> #include <string>
#include <cmath> #include <cmath>
@ -35,11 +37,29 @@
#include "ir/primitive.h" #include "ir/primitive.h"
#include "abstract/dshape.h" #include "abstract/dshape.h"
#include "tools/converter/quantizer/bitpacking.h" #include "tools/converter/quantizer/bitpacking.h"
#include "src/lite_session.h"
#include "tools/converter/graphdef_transform.h"
#include "src/common/file_utils.h"
namespace mindspore::lite::quant { namespace mindspore::lite::quant {
static constexpr size_t UINT8_QUANTIZATION = 8; static constexpr size_t UINT8_QUANTIZATION = 8;
static constexpr size_t WEIGHT_INDEX = 1; static constexpr size_t WEIGHT_INDEX = 1;
const char kMethodMaxMin[] = "MAX_MIN";
const char kMethodKL[] = "KL";
const char kMethodOutlier[] = "RemovalOutlier";
struct PostQuantConfig {
std::vector<std::string> image_paths;
uint32_t batch_count{100};
std::string method_x{kMethodKL};
uint32_t thread_num{1};
bool bias_correction{false};
bool mixed{false};
float mean_error_threshold{0.04};
bool inited{false};
};
/** /**
* 1. when op's weight size > mWeightSize just skip * 1. when op's weight size > mWeightSize just skip
* 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization * 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
@ -320,6 +340,21 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
return RET_OK; return RET_OK;
} }
// utils
schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode); schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
int thread_num);
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
std::vector<std::vector<std::string>> *inputs);
STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
const std::vector<std::vector<std::string>> &images, mindspore::tensor::MSTensor *tensor);
FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
} // namespace mindspore::lite::quant } // namespace mindspore::lite::quant
#endif #endif

@ -17,9 +17,12 @@
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H
#include <future>
#include <memory> #include <memory>
#include <map>
#include <list> #include <list>
#include <string> #include <string>
#include <vector>
#include "tools/converter/quantizer/quantizer.h" #include "tools/converter/quantizer/quantizer.h"
#include "tools/converter/quantizer/quantize_util.h" #include "tools/converter/quantizer/quantize_util.h"
#include "ir/func_graph.h" #include "ir/func_graph.h"
@ -27,27 +30,37 @@
#include "include/model.h" #include "include/model.h"
#include "base/base.h" #include "base/base.h"
#include "abstract/dshape.h" #include "abstract/dshape.h"
#include "src/lite_session.h"
namespace mindspore::lite::quant { namespace mindspore::lite::quant {
class WeightQuantizer : public Quantizer { class WeightQuantizer : public Quantizer {
public: public:
WeightQuantizer(FuncGraphPtr graph, const std::string &weightSize, const std::string &covWeightChannelThreshold, WeightQuantizer(FuncGraphPtr graph, const std::string &config_file, const std::string &weightSize,
const std::string &bitNum); const std::string &covWeightChannelThreshold, const std::string &bitNum);
WeightQuantizer(FuncGraphPtr graph, const PostQuantConfig &config);
~WeightQuantizer();
~WeightQuantizer() = default; STATUS DoQuantize(FuncGraphPtr func_graph) override;
STATUS DoQuantize(FuncGraphPtr funcGraph) override;
STATUS DoConvQuantize(const std::list<CNodePtr> &nodes); STATUS DoConvQuantize(const std::list<CNodePtr> &nodes);
STATUS DoMulQuantize(const std::list<CNodePtr> &nodes); STATUS DoMulQuantize(const std::list<CNodePtr> &nodes);
static STATUS WeightQuantInputCheck(const converter::Flags *config); static STATUS WeightQuantInputCheck(const converter::Flags *config);
static bool IsPosNum(const std::string &str); static bool IsPosNum(const std::string &str);
int quant_max; int quant_max;
int quant_min; int quant_min;
TypeId type_id{kTypeUnknown}; TypeId type_id{kTypeUnknown};
std::map<std::string, int> opname_bit_;
private: private:
std::unique_ptr<QuantStrategy> mStrategy; std::unique_ptr<QuantStrategy> quant_strategy_;
size_t bitNum; size_t bit_num_;
std::string config_file_;
PostQuantConfig config_param_;
std::vector<std::vector<std::string>> images_; // multi_input, [[mode_input_0], [model_input_1]...]
session::LiteSession *fp32_session_ = nullptr;
STATUS DoMiexedQuant(FuncGraphPtr);
STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
}; };
} // namespace mindspore::lite::quant } // namespace mindspore::lite::quant
#endif #endif

Loading…
Cancel
Save