diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 5a768184a3..a17d01b58c 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -383,6 +383,63 @@ ExecutorPy::~ExecutorPy() { ConfigManager::GetInstance().ResetConfig(); } +void ExecutorPy::GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node, + std::map> *fake_quant_table) { + std::string weight_name; + auto x = root_node->input(1); + if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) { + weight_name = weight_node->cast()->input(1)->cast()->name(); + } else { + weight_name = weight_node->cast()->name(); + } + // find the fakequant from input + int64_t count = 0; + const int64_t max_depth = 5; + CNodePtr cnode = nullptr; + auto is_quant_cnode = [](const AnfNodePtr &node) { + return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) || + IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel); + }; + while (!is_quant_cnode(x)) { + if (count >= max_depth) { + break; + } + cnode = x->cast(); + if (cnode == nullptr || cnode->size() <= 1) { + break; + } + x = cnode->input(1); + count += 1; + } + if (x->isa() || IsPrimitiveCNode(x, prim::kPrimLoad)) { + (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input"); + } + // get the fakequant parameter minq's name + if (!is_quant_cnode(x)) { + return; + } + cnode = x->cast(); + if (cnode == nullptr || IsPrimitiveCNode(cnode, prim::kPrimLoad) || cnode->size() != 4) { + return; + } + auto fakequant_min_node = cnode->input(2); + if (!fakequant_min_node->isa() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) { + return; + } + std::string fakequant_min_node_name; + if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) { + fakequant_min_node_name = fakequant_min_node->cast()->input(1)->cast()->name(); + } else { + fakequant_min_node_name = fakequant_min_node->cast()->name(); + } + auto quant_op_value = cnode->input(0)->cast()->value(); + if (!quant_op_value->isa()) { + return; + } + auto quant_op = quant_op_value->cast(); + (*fake_quant_table)[weight_name] = std::make_pair(quant_op, fakequant_min_node_name); +} + std::map> ExecutorPy::FetchInfoForQuantExport( const std::string &phase_s) { FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph(); @@ -399,58 +456,21 @@ std::map> ExecutorPy::FetchI IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel); }; for (const auto &node : nodes) { - auto cnode = node->cast(); - if (cnode == nullptr || cnode->size() != 3) { + auto root_node = node->cast(); + if (root_node == nullptr || root_node->size() != 3) { continue; } - auto x = cnode->input(1); - auto weight = cnode->input(2); + auto weight = root_node->input(2); if (!is_quant_cnode(weight)) { continue; } // get parameter weight's name - cnode = weight->cast(); + auto cnode = weight->cast(); auto weight_node = cnode->input(2); - if (!weight_node->isa()) { - continue; - } - auto weight_name = weight_node->cast()->name(); - // find the fakequant from input - int64_t count = 0; - const int64_t max_depth = 5; - while (!is_quant_cnode(x)) { - if (count >= max_depth) { - break; - } - cnode = x->cast(); - if (cnode == nullptr || cnode->size() <= 1) { - break; - } - x = cnode->input(1); - count += 1; - } - if (x->isa()) { - fake_quant_table[weight_name] = std::make_pair(nullptr, "input"); - } - // get the fakequant parameter minq's name - if (!is_quant_cnode(x)) { - continue; - } - cnode = x->cast(); - if (cnode == nullptr || cnode->size() != 4) { - continue; - } - auto fakequant_min_node = cnode->input(2); - if (!fakequant_min_node->isa()) { - continue; - } - auto fakequant_min_node_name = fakequant_min_node->cast()->name(); - auto quant_op_value = cnode->input(0)->cast()->value(); - if (!quant_op_value->isa()) { + if (!weight_node->isa() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) { continue; } - auto quant_op = quant_op_value->cast(); - fake_quant_table[weight_name] = std::make_pair(quant_op, fakequant_min_node_name); + GetWeightInfo(root_node, weight_node, &fake_quant_table); } return fake_quant_table; diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h index 6ead471423..7588f762c0 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.h +++ b/mindspore/ccsrc/pipeline/jit/pipeline.h @@ -110,6 +110,8 @@ class ExecutorPy : public std::enable_shared_from_this { private: ExecutorPy(); void ConvertObjectToTensors(const py::dict &dict, std::map *tensors); + void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node, + std::map> *fake_quant_table); void GetGeBackendPolicy() const; // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after // 'validate' stage diff --git a/mindspore/compression/export/quant_export.py b/mindspore/compression/export/quant_export.py index 5ded0bb29f..979ad23864 100644 --- a/mindspore/compression/export/quant_export.py +++ b/mindspore/compression/export/quant_export.py @@ -76,7 +76,7 @@ class ExportToQuantInferNetwork: return network def _get_quant_block(self, cell_core, activation, fake_quant_a_out): - """convet network's quant subcell to deploy subcell""" + """convert network's quant subcell to deploy subcell""" # Calculate the scale and zero point w_minq_name = cell_core.fake_quant_weight.minq.name np_type = mstype.dtype_to_nptype(self.data_type) @@ -129,7 +129,7 @@ class ExportToQuantInferNetwork: if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): if cell_core.has_bias: bias = cell_core.bias.data.asnumpy() - elif isinstance(cell_core, quant.Conv2dBnFoldQuant): + elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)): weight, bias = quant_utils.fold_batchnorm(weight, cell_core) elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant): weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core) diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py index d5c98462c2..4b9de7aae5 100644 --- a/mindspore/nn/layer/quant.py +++ b/mindspore/nn/layer/quant.py @@ -1381,11 +1381,14 @@ class QuantBlock(Cell): self.activation = activation self.has_act = activation is not None self.bias_add = P.BiasAdd() + self.sub = P.Sub() + self.weight_offset = Parameter(np.zeros(shape=weight.shape, dtype=np.int8), name='weight_offset') def construct(self, x): x = self.quant(x) if self.has_bias: - x = self.core_op(x, self.weight) + weight = self.sub(self.weight, self.weight_offset) + x = self.core_op(x, weight) x = self.bias_add(x, self.bias) else: x = self.core_op(x, self.weight) diff --git a/model_zoo/official/cv/mobilenetv2_quant/export.py b/model_zoo/official/cv/mobilenetv2_quant/export.py index fce359ce2f..622952532f 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/export.py +++ b/model_zoo/official/cv/mobilenetv2_quant/export.py @@ -26,6 +26,7 @@ from src.config import config_quant parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') +parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"], default="MINDIR", help="file format") parser.add_argument('--device_target', type=str, default=None, help='Run device target') args_opt = parser.parse_args() @@ -46,5 +47,9 @@ if __name__ == '__main__': # export network print("============== Starting export ==============") inputs = Tensor(np.ones([1, 3, cfg.image_height, cfg.image_width]), mindspore.float32) - export(network, inputs, file_name="mobilenet_quant", file_format='MINDIR', quant_mode='AUTO') + if args_opt.file_format == 'MINDIR': + export(network, inputs, file_name="mobilenet_quant", file_format='MINDIR', quant_mode='AUTO') + else: + export(network, inputs, file_name="mobilenet_quant", file_format='AIR', + quant_mode='AUTO', mean=0., std_dev=48.106) print("============== End export ==============")