From: @xutianchun
Reviewed-by: 
Signed-off-by:
pull/11040/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 1908d6b8c4

@ -21,7 +21,7 @@
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -24,7 +24,7 @@
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -21,8 +21,7 @@
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include <memory>
#include <string>
#ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h"

@ -18,7 +18,7 @@
#include <memory>
#include <utility>
#ifdef PRIMITIVE_WRITEABLE
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -19,8 +19,7 @@
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -19,7 +19,7 @@
#include "src/common/log_adapter.h"
#ifdef PRIMITIVE_WRITEABLE
#include <float.h>
#include "tools/converter/quantizer/quantize_util.h"
#include "src/param_value_lite.h"
#endif
#ifndef PRIMITIVE_WRITEABLE

@ -389,7 +389,9 @@ void PrimitiveC::set_input_quant_params(const std::vector<std::vector<schema::Qu
}
void PrimitiveC::set_input_quant_param(const size_t &index, const std::vector<schema::QuantParamT> &input_quant_param) {
MS_ASSERT(index < this->input_quant_param_.size());
if (index >= this->input_quant_param_.size()) {
this->input_quant_param_.resize(index + 1);
}
this->input_quant_param_.at(index) = input_quant_param;
}
@ -495,7 +497,7 @@ std::shared_ptr<PrimitiveC> GetTupleGetItemPrim() {
}
template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>>
std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vector<AnfNodePtr> &inputs,
std::shared_ptr<PrimitiveC> NewPrimitiveC(const mindspore::Primitive &prim, const std::vector<AnfNodePtr> &inputs,
const schema::QuantType &quantType) {
auto primc = std::make_shared<T>();
if (primc == nullptr) {

@ -204,7 +204,7 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap
ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR);
return nullptr;
}
this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->quantWeightSize,
this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->configFile, config->quantWeightSize,
config->quantWeightChannel, config->bitNum);
if (mQuantizer == nullptr) {
MS_LOG(ERROR) << "New WeightQuantizer failed";

@ -32,8 +32,6 @@
#include "tools/anf_exporter/anf_exporter.h"
#include "tools/anf_importer/import_from_mindir.h"
#include "proto/onnx.pb.h"
#include "tools/converter/quantizer/post_training_quantizer.h"
#include "tools/converter/quantizer/quant_cast.h"
#include "include/version.h"
namespace mindspore {

@ -16,7 +16,6 @@
#include "tools/converter/legacy_optimizer/graph/tensor_name_pass.h"
#include "tools/converter/converter_context.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/common/tensor_util.h"
namespace mindspore::lite {

@ -6,7 +6,6 @@ include_directories(${3RD_DIR}/opencv/build/include/opencv4)
file(GLOB QUANTIZER
${CMAKE_CURRENT_SOURCE_DIR}/calc_quant_param.cc
${CMAKE_CURRENT_SOURCE_DIR}/quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/aware_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/quantize_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc

@ -28,6 +28,8 @@
#include "tools/converter/quantizer/quantizer.h"
#include "tools/converter/converter.h"
#include "include/ms_tensor.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/converter/quantizer/weight_quantizer.h"
namespace mindspore::lite::quant {
class Calibrator;
@ -38,19 +40,8 @@ struct MaxMin {
float max;
};
const char kMethodMaxMin[] = "MAX_MIN";
const char kMethodKL[] = "KL";
const char kMethodOutlier[] = "RemovalOutlier";
constexpr int kDefaultBinNumber = 2048;
struct ConfigParam {
std::vector<std::string> image_paths;
uint32_t batch_count{100};
std::string method_x{kMethodKL};
uint32_t thread_num{1};
bool bias_correction{false};
};
class PostTrainingQuantizer : public Quantizer {
public:
PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
@ -64,14 +55,16 @@ class PostTrainingQuantizer : public Quantizer {
int quant_min{INT8_MIN};
private:
std::map<std::string, int> opname_bit_;
bool per_channel_{true};
TypeId target_type_{kNumberTypeInt8};
std::unique_ptr<Calibrator> calibrator_;
mindspore::lite::LiteSession *fp32_session_;
mindspore::lite::LiteSession *int8_session_;
session::LiteSession *fp32_session_{nullptr};
session::LiteSession *int8_session_{nullptr};
std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency
std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency
@ -112,7 +105,8 @@ class PostTrainingQuantizer : public Quantizer {
STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min,
const std::shared_ptr<PrimitiveC> &) const;
STATUS DoWeightQuant(const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c, bool perchannel) const;
STATUS DoWeightQuant(const std::string &op_name, const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c,
bool perchannel) const;
STATUS DoBiasQuant(const AnfNodePtr &bias, const std::shared_ptr<PrimitiveC> &primitive_c);
STATUS Int8Inference();
@ -213,13 +207,13 @@ class Calibrator {
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> *GetOutputDivergInfo();
PostQuantConfig config_param_;
private:
std::vector<std::vector<std::string>> images_; // multi_input, echo input has multi input data
std::string config_path_;
ConfigParam config_param_;
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> inputs_diverg_info_;
std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> outputs_diverg_info_;
@ -227,8 +221,6 @@ class Calibrator {
size_t bit_num_;
int quant_max_;
int quant_min_;
void AddImage(const std::string &file, size_t index);
};
} // namespace mindspore::lite::quant
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H

File diff suppressed because it is too large Load Diff

@ -17,6 +17,8 @@
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H
#include <dirent.h>
#include <sys/stat.h>
#include <memory>
#include <string>
#include <cmath>
@ -35,11 +37,29 @@
#include "ir/primitive.h"
#include "abstract/dshape.h"
#include "tools/converter/quantizer/bitpacking.h"
#include "src/lite_session.h"
#include "tools/converter/graphdef_transform.h"
#include "src/common/file_utils.h"
namespace mindspore::lite::quant {
static constexpr size_t UINT8_QUANTIZATION = 8;
static constexpr size_t WEIGHT_INDEX = 1;
const char kMethodMaxMin[] = "MAX_MIN";
const char kMethodKL[] = "KL";
const char kMethodOutlier[] = "RemovalOutlier";
struct PostQuantConfig {
std::vector<std::string> image_paths;
uint32_t batch_count{100};
std::string method_x{kMethodKL};
uint32_t thread_num{1};
bool bias_correction{false};
bool mixed{false};
float mean_error_threshold{0.04};
bool inited{false};
};
/**
* 1. when op's weight size > mWeightSize just skip
* 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
@ -320,6 +340,21 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
return RET_OK;
}
// utils
schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);
STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);
session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
int thread_num);
STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
std::vector<std::vector<std::string>> *inputs);
STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
const std::vector<std::vector<std::string>> &images, mindspore::tensor::MSTensor *tensor);
FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
} // namespace mindspore::lite::quant
#endif

@ -17,9 +17,12 @@
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H
#include <future>
#include <memory>
#include <map>
#include <list>
#include <string>
#include <vector>
#include "tools/converter/quantizer/quantizer.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "ir/func_graph.h"
@ -27,27 +30,37 @@
#include "include/model.h"
#include "base/base.h"
#include "abstract/dshape.h"
#include "src/lite_session.h"
namespace mindspore::lite::quant {
class WeightQuantizer : public Quantizer {
public:
WeightQuantizer(FuncGraphPtr graph, const std::string &weightSize, const std::string &covWeightChannelThreshold,
const std::string &bitNum);
WeightQuantizer(FuncGraphPtr graph, const std::string &config_file, const std::string &weightSize,
const std::string &covWeightChannelThreshold, const std::string &bitNum);
WeightQuantizer(FuncGraphPtr graph, const PostQuantConfig &config);
~WeightQuantizer();
~WeightQuantizer() = default;
STATUS DoQuantize(FuncGraphPtr funcGraph) override;
STATUS DoQuantize(FuncGraphPtr func_graph) override;
STATUS DoConvQuantize(const std::list<CNodePtr> &nodes);
STATUS DoMulQuantize(const std::list<CNodePtr> &nodes);
static STATUS WeightQuantInputCheck(const converter::Flags *config);
static bool IsPosNum(const std::string &str);
int quant_max;
int quant_min;
TypeId type_id{kTypeUnknown};
std::map<std::string, int> opname_bit_;
private:
std::unique_ptr<QuantStrategy> mStrategy;
size_t bitNum;
std::unique_ptr<QuantStrategy> quant_strategy_;
size_t bit_num_;
std::string config_file_;
PostQuantConfig config_param_;
std::vector<std::vector<std::string>> images_; // multi_input, [[mode_input_0], [model_input_1]...]
session::LiteSession *fp32_session_ = nullptr;
STATUS DoMiexedQuant(FuncGraphPtr);
STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
};
} // namespace mindspore::lite::quant
#endif

Loading…
Cancel
Save