diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
index 5a768184a3..a17d01b58c 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -383,6 +383,63 @@ ExecutorPy::~ExecutorPy() {
   ConfigManager::GetInstance().ResetConfig();
 }
 
+void ExecutorPy::GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
+                               std::map<std::string, std::pair<PrimitivePyPtr, std::string>> *fake_quant_table) {
+  std::string weight_name;
+  auto x = root_node->input(1);
+  if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
+    weight_name = weight_node->cast<CNodePtr>()->input(1)->cast<ParameterPtr>()->name();
+  } else {
+    weight_name = weight_node->cast<ParameterPtr>()->name();
+  }
+  // find the fakequant from input
+  int64_t count = 0;
+  const int64_t max_depth = 5;
+  CNodePtr cnode = nullptr;
+  auto is_quant_cnode = [](const AnfNodePtr &node) {
+    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
+           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel);
+  };
+  while (!is_quant_cnode(x)) {
+    if (count >= max_depth) {
+      break;
+    }
+    cnode = x->cast<CNodePtr>();
+    if (cnode == nullptr || cnode->size() <= 1) {
+      break;
+    }
+    x = cnode->input(1);
+    count += 1;
+  }
+  if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
+    (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
+  }
+  // get the fakequant parameter minq's name
+  if (!is_quant_cnode(x)) {
+    return;
+  }
+  cnode = x->cast<CNodePtr>();
+  if (cnode == nullptr || IsPrimitiveCNode(cnode, prim::kPrimLoad) || cnode->size() != 4) {
+    return;
+  }
+  auto fakequant_min_node = cnode->input(2);
+  if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
+    return;
+  }
+  std::string fakequant_min_node_name;
+  if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
+    fakequant_min_node_name = fakequant_min_node->cast<CNodePtr>()->input(1)->cast<ParameterPtr>()->name();
+  } else {
+    fakequant_min_node_name = fakequant_min_node->cast<ParameterPtr>()->name();
+  }
+  auto quant_op_value = cnode->input(0)->cast<ValueNodePtr>()->value();
+  if (!quant_op_value->isa<PrimitivePy>()) {
+    return;
+  }
+  auto quant_op = quant_op_value->cast<PrimitivePyPtr>();
+  (*fake_quant_table)[weight_name] = std::make_pair(quant_op, fakequant_min_node_name);
+}
+
 std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchInfoForQuantExport(
   const std::string &phase_s) {
   FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph();
@@ -399,58 +456,21 @@ std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchI
            IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel);
   };
   for (const auto &node : nodes) {
-    auto cnode = node->cast<CNodePtr>();
-    if (cnode == nullptr || cnode->size() != 3) {
+    auto root_node = node->cast<CNodePtr>();
+    if (root_node == nullptr || root_node->size() != 3) {
       continue;
     }
-    auto x = cnode->input(1);
-    auto weight = cnode->input(2);
+    auto weight = root_node->input(2);
     if (!is_quant_cnode(weight)) {
       continue;
     }
     // get parameter weight's name
-    cnode = weight->cast<CNodePtr>();
+    auto cnode = weight->cast<CNodePtr>();
     auto weight_node = cnode->input(2);
-    if (!weight_node->isa<Parameter>()) {
-      continue;
-    }
-    auto weight_name = weight_node->cast<ParameterPtr>()->name();
-    // find the fakequant from input
-    int64_t count = 0;
-    const int64_t max_depth = 5;
-    while (!is_quant_cnode(x)) {
-      if (count >= max_depth) {
-        break;
-      }
-      cnode = x->cast<CNodePtr>();
-      if (cnode == nullptr || cnode->size() <= 1) {
-        break;
-      }
-      x = cnode->input(1);
-      count += 1;
-    }
-    if (x->isa<Parameter>()) {
-      fake_quant_table[weight_name] = std::make_pair(nullptr, "input");
-    }
-    // get the fakequant parameter minq's name
-    if (!is_quant_cnode(x)) {
-      continue;
-    }
-    cnode = x->cast<CNodePtr>();
-    if (cnode == nullptr || cnode->size() != 4) {
-      continue;
-    }
-    auto fakequant_min_node = cnode->input(2);
-    if (!fakequant_min_node->isa<Parameter>()) {
-      continue;
-    }
-    auto fakequant_min_node_name = fakequant_min_node->cast<ParameterPtr>()->name();
-    auto quant_op_value = cnode->input(0)->cast<ValueNodePtr>()->value();
-    if (!quant_op_value->isa<PrimitivePy>()) {
+    if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
       continue;
     }
-    auto quant_op = quant_op_value->cast<PrimitivePyPtr>();
-    fake_quant_table[weight_name] = std::make_pair(quant_op, fakequant_min_node_name);
+    GetWeightInfo(root_node, weight_node, &fake_quant_table);
   }
 
   return fake_quant_table;
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h
index 6ead471423..7588f762c0 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -110,6 +110,8 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
  private:
   ExecutorPy();
   void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
+  void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
+                     std::map<std::string, std::pair<PrimitivePyPtr, std::string>> *fake_quant_table);
   void GetGeBackendPolicy() const;
   // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
   // 'validate' stage
diff --git a/mindspore/compression/export/quant_export.py b/mindspore/compression/export/quant_export.py
index 5ded0bb29f..979ad23864 100644
--- a/mindspore/compression/export/quant_export.py
+++ b/mindspore/compression/export/quant_export.py
@@ -76,7 +76,7 @@ class ExportToQuantInferNetwork:
         return network
 
     def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
-        """convet network's quant subcell to deploy subcell"""
+        """convert network's quant subcell to deploy subcell"""
         # Calculate the scale and zero point
         w_minq_name = cell_core.fake_quant_weight.minq.name
         np_type = mstype.dtype_to_nptype(self.data_type)
@@ -129,7 +129,7 @@ class ExportToQuantInferNetwork:
         if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
             if cell_core.has_bias:
                 bias = cell_core.bias.data.asnumpy()
-        elif isinstance(cell_core, quant.Conv2dBnFoldQuant):
+        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
             weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
         elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
             weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py
index d5c98462c2..4b9de7aae5 100644
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -1381,11 +1381,14 @@ class QuantBlock(Cell):
         self.activation = activation
         self.has_act = activation is not None
         self.bias_add = P.BiasAdd()
+        self.sub = P.Sub()
+        self.weight_offset = Parameter(np.zeros(shape=weight.shape, dtype=np.int8), name='weight_offset')
 
     def construct(self, x):
         x = self.quant(x)
         if self.has_bias:
-            x = self.core_op(x, self.weight)
+            weight = self.sub(self.weight, self.weight_offset)
+            x = self.core_op(x, weight)
             x = self.bias_add(x, self.bias)
         else:
             x = self.core_op(x, self.weight)
diff --git a/model_zoo/official/cv/mobilenetv2_quant/export.py b/model_zoo/official/cv/mobilenetv2_quant/export.py
index fce359ce2f..622952532f 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/export.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/export.py
@@ -26,6 +26,7 @@ from src.config import config_quant
 
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"], default="MINDIR", help="file format")
 parser.add_argument('--device_target', type=str, default=None, help='Run device target')
 args_opt = parser.parse_args()
 
@@ -46,5 +47,9 @@ if __name__ == '__main__':
     # export network
     print("============== Starting export ==============")
     inputs = Tensor(np.ones([1, 3, cfg.image_height, cfg.image_width]), mindspore.float32)
-    export(network, inputs, file_name="mobilenet_quant", file_format='MINDIR', quant_mode='AUTO')
+    if args_opt.file_format == 'MINDIR':
+        export(network, inputs, file_name="mobilenet_quant", file_format='MINDIR', quant_mode='AUTO')
+    else:
+        export(network, inputs, file_name="mobilenet_quant", file_format='AIR',
+               quant_mode='AUTO', mean=0., std_dev=48.106)
     print("============== End export ==============")