diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
index 23ec9da87b..687bc12f05 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
+++ b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
@@ -35,8 +35,8 @@ bool StepAllreduceFusion(const FuncGraphPtr &root, const opt::OptimizerPtr &opti
   // assume no change to graph
   bool changes = false;
   // control whether use model_parallel mode
-  if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) || (!enable_all_reduce_fusion) ||
-      (root->has_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY))) {
+  if (!root->has_flag(AUTO_PARALLEL) || ((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) ||
+      (!enable_all_reduce_fusion) || (root->has_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY))) {
     return changes;
   }
 #if defined(_WIN32) || defined(_WIN64)
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index b16108a279..3c538002e6 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -121,7 +121,8 @@ bool StepAutoParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &) {
   // assume no change to graph
   bool changes = false;
   // control whether use model_parallel mode
-  if ((parallel_mode != AUTO_PARALLEL) || root->flags()[AUTO_PARALLEL_RUN_ONCE_ONLY]) {
+  if (!root->has_flag(AUTO_PARALLEL) || (parallel_mode != AUTO_PARALLEL) ||
+      root->has_flag(AUTO_PARALLEL_RUN_ONCE_ONLY)) {
     return changes;
   }
   // check whether strategy_search_mode is valid
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 21a515ff85..d445231e8e 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -2220,7 +2220,7 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer)
   // assume no change to graph
   bool changes = false;
   // control whether use model_parallel mode
-  if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) ||
+  if (!root->has_flag(AUTO_PARALLEL) || ((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) ||
       (root->has_flag(SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY))) {
     return changes;
   }
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index b0d4c0ece1..a20767f77d 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -281,7 +281,7 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
 
   MS_LOG(INFO) << "Save compiled func graph(" << func_graph->ToString() << ") phase(" << phase_s << ")!";
   info_[phase_s]->func_graph = func_graph;
-  if ((func_graph != nullptr) &&
+  if ((func_graph != nullptr) && func_graph->has_flag(parallel::AUTO_PARALLEL) &&
       ((parallel_mode == parallel::AUTO_PARALLEL) || (parallel_mode == parallel::SEMI_AUTO_PARALLEL))) {
     MS_LOG(DEBUG) << "Save model parallel parameter layout graph!";
     func_graph = info_[phase_s]->resource->results()[kStepParallelGraph].cast<FuncGraphPtr>();
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 3710e40996..0300908f1b 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -20,7 +20,6 @@ from collections import OrderedDict
 from functools import wraps
 from mindspore import context
 from mindspore import log as logger
-from mindspore.parallel._utils import _get_parallel_mode
 from .._c_expression import generate_key, Executor_, Tensor, MetaTensor
 from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
 from .tensor import Tensor as MsTensor
@@ -327,7 +326,7 @@ class _Executor:
             raise TypeError('Parameters need OrderedDict type, but got {}'.
                             format(type(params)))
 
-    def compile(self, obj, *args, phase='predict', params=None, do_convert=True):
+    def compile(self, obj, *args, phase='predict', params=None, do_convert=True, auto_parallel_mode=False):
         """
         Compiles graph.
 
@@ -337,6 +336,7 @@ class _Executor:
             phase (str): The name of compile phase. Default: 'predict'.
             params (OrderedDict): The parameters dictionary used for init data graph. Default: None.
             do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
+            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.
 
         Return:
             Str, the full phase of the cell.
@@ -370,8 +370,9 @@ class _Executor:
             logger.error("%r graph compile failed.", phase)
         if not do_convert:
             return phase, True
+
         if not enable_debug_runtime or enable_ge:
-            if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
+            if auto_parallel_mode:
                 obj.parameter_layout_dict = self._executor.get_parameter_layout(phase)
                 obj.load_parameter_slice(params)
 
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 9cea668471..a7fe712f3c 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -25,7 +25,6 @@ from ..common.parameter import Parameter, ParameterTuple
 from .._c_expression import init_backend
 from ..ops.primitive import Primitive
 from ..parallel._tensor import _load_tensor_by_layout
-from ..parallel._utils import _get_parallel_mode
 from ..common.tensor import Tensor
 
 
@@ -71,8 +70,7 @@ class Cell:
         gc.collect()
         self._construct_inputs_num = 0
         self._construct_inputs_names = []
-        if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
-            self._get_construct_inputs_number_and_name()
+        self._auto_parallel_mode = False
         self._parallel_inputs_run = None
         if flags:
             self.add_flags(**flags)
@@ -298,9 +296,10 @@ class Cell:
         Returns:
             Object, the result of executing.
         """
-        _, compile_flag = _executor.compile(self, *inputs, phase=self.phase)
+        _, compile_flag = _executor.compile(self, *inputs, phase=self.phase,
+                                            auto_parallel_mode=self._auto_parallel_mode)
 
-        if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
+        if self._auto_parallel_mode:
             if inputs and isinstance(inputs[0], Tensor) and inputs[0].virtual_flag and (not compile_flag):
                 parallel_inputs_run = self._parallel_inputs_run
             else:
@@ -665,3 +664,15 @@ class Cell:
         """
         self.add_flags_recursive(broadcast_flag=mode)
         return self
+
+    def set_auto_parallel(self):
+        """
+        Set the cell to auto parallel mode.
+
+        Note:
+            If a cell needs to use auto parallel or semi auto parallel mode for training, evaluation or prediction,
+            this interface needs to be called for the cell.
+        """
+        self._auto_parallel_mode = True
+        self.add_flags(auto_parallel=True)
+        self._get_construct_inputs_number_and_name()
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index cb3a0c0ac7..3301c3c970 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -16,8 +16,7 @@
 
 from mindspore._c_expression import reset_op_id
 from mindspore.communication.management import get_group_size, get_rank
-from mindspore.parallel._auto_parallel_context import auto_parallel_context, _set_auto_parallel_context,\
-    _reset_auto_parallel_context
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
 
 
 def _get_parallel_mode():
@@ -108,102 +107,6 @@ def _parameter_broadcast_check(parallel_mode, parameter_broadcast):
                          .format(parallel_mode, parameter_broadcast))
 
 
-_parallel_mode = None
-_device_num = None
-_global_rank = None
-_parameter_broadcast = None
-_mirror_mean = None
-_cast_before_mirror = None
-_loss_repeated_mean = None
-_communication_backend = None
-_has_checkpointed = False
-_enable_all_reduce_fusion = None
-
-
-def _checkpoint_auto_parallel_context():
-    """checkpoint auto parallel context"""
-    global _has_checkpointed
-    if _has_checkpointed is True:
-        return
-
-    global _parallel_mode
-    global _device_num
-    global _global_rank
-    global _parameter_broadcast
-    global _mirror_mean
-    global _cast_before_mirror
-    global _loss_repeated_mean
-    global _communication_backend
-    global _enable_all_reduce_fusion
-    _parallel_mode = auto_parallel_context().get_parallel_mode()
-    _device_num = _get_device_num()
-    _global_rank = _get_global_rank()
-    _parameter_broadcast = auto_parallel_context().get_parameter_broadcast()
-    _mirror_mean = auto_parallel_context().get_mirror_mean()
-    _cast_before_mirror = auto_parallel_context().get_cast_before_mirror()
-    _loss_repeated_mean = auto_parallel_context().get_loss_repeated_mean()
-    _communication_backend = auto_parallel_context().get_communication_backend()
-    _enable_all_reduce_fusion = auto_parallel_context().get_enable_all_reduce_fusion()
-    _has_checkpointed = True
-
-
-def _restore_auto_parallel_context():
-    """restore auto parallel context"""
-    global _parallel_mode
-    global _device_num
-    global _global_rank
-    global _parameter_broadcast
-    global _mirror_mean
-    global _cast_before_mirror
-    global _loss_repeated_mean
-    global _communication_backend
-    global _enable_all_reduce_fusion
-    _set_auto_parallel_context(parallel_mode=_parallel_mode, device_num=_device_num, global_rank=_global_rank,
-                               parameter_broadcast=_parameter_broadcast, mirror_mean=_mirror_mean,
-                               cast_before_mirror=_cast_before_mirror, loss_repeated_mean=_loss_repeated_mean)
-    auto_parallel_context().set_communication_backend(_communication_backend)
-    auto_parallel_context().set_enable_all_reduce_fusion(_enable_all_reduce_fusion)
-
-
-def _reset_checkpoint_auto_parallel_context():
-    """reset the _has_checkpointed"""
-    global _has_checkpointed
-    _has_checkpointed = False
-
-
-def _callback_wrapper(list_callback, run_context, callback_type):
-    """
-    reset the context for callback of model train
-
-    Raises:
-        ValueError: If the type keyword is not recognized
-    """
-    _callback_func_map = {
-        "begin": list_callback.begin,
-        "epoch_begin": list_callback.epoch_begin,
-        "step_begin": list_callback.step_begin,
-        "step_end": list_callback.step_end,
-        "epoch_end": list_callback.epoch_end,
-        "end": list_callback.end}
-
-    if callback_type not in _callback_func_map:
-        raise ValueError("Get type keyword %s is not recognized!" % callback_type)
-    func = _callback_func_map[callback_type]
-
-    if callback_type == "begin":
-        _reset_checkpoint_auto_parallel_context()
-
-    _checkpoint_auto_parallel_context()
-    global _parallel_mode
-    if _parallel_mode == "stand_alone":
-        func(run_context)
-        return
-
-    _reset_auto_parallel_context()
-    func(run_context)
-    _restore_auto_parallel_context()
-
-
 PARAMETER_CLONED_INDEX = 0
 
 
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index c943252e43..1017b1daa1 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -22,7 +22,7 @@ from .._checkparam import check_input_data, check_output_data, check_int_positiv
 from .callback import _InternalCallbackParam, RunContext, _build_callbacks
 from .. import context
 from ..parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \
-    _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check, _callback_wrapper
+    _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check
 from ..nn.metrics import Loss
 from .. import nn
 from ..nn.wrap.cell_wrapper import _VirtualDatasetCell
@@ -144,6 +144,9 @@ class Model:
         elif self._loss_fn:
             network = nn.WithLossCell(network, self._loss_fn)
         # If need to check if loss_fn is not None, but optimizer is None
+
+        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
+            network.set_auto_parallel()
         return network
 
     def _build_eval_network(self, metrics, eval_network, eval_indexes):
@@ -165,11 +168,15 @@ class Model:
             self._eval_network = nn.WithEvalCell(self._network, self._loss_fn, self._amp_level == "O2")
             self._eval_indexes = [0, 1, 2]
 
+        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
+            self._eval_network.set_auto_parallel()
+
     def _build_predict_network(self):
         """Build the network for prediction."""
         self._predict_network = self._network
         if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
             self._predict_network = _VirtualDatasetCell(self._network)
+            self._predict_network.set_auto_parallel()
 
     def _clear_metrics(self):
         """Clear metrics local values."""
@@ -287,28 +294,28 @@ class Model:
         cb_params.cur_step_num = 0
         loop_size = dataset_helper.loop_size()
         run_context = RunContext(cb_params)
-        _callback_wrapper(list_callback, run_context, "begin")
+        list_callback.begin(run_context)
 
         # used to stop training for early stop, such as stopAtTIme or stopATStep
         should_stop = False
         for i in range(epoch):
             cb_params.cur_epoch_num = i + 1
-            _callback_wrapper(list_callback, run_context, "epoch_begin")
+            list_callback.epoch_begin(run_context)
 
             # for data sink dataset_helper only iter once, other wise iter epoch_size times.
             for inputs in dataset_helper:
                 cb_params.cur_step_num += loop_size
-                _callback_wrapper(list_callback, run_context, "step_begin")
+                list_callback.step_begin(run_context)
                 outputs = self._train_network(*inputs)
                 cb_params.net_outputs = outputs
-                _callback_wrapper(list_callback, run_context, "step_end")
+                list_callback.step_end(run_context)
 
-            _callback_wrapper(list_callback, run_context, "epoch_end")
+            list_callback.epoch_end(run_context)
             should_stop = should_stop or run_context.get_stop_requested()
             if should_stop:
                 break
 
-        _callback_wrapper(list_callback, run_context, "end")
+        list_callback.end(run_context)
 
     def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None):
         """
@@ -327,14 +334,14 @@ class Model:
         dataset_helper = DatasetHelper(train_dataset, dataset_sink_mode=False)
         cb_params.cur_step_num = 0
         run_context = RunContext(cb_params)
-        _callback_wrapper(list_callback, run_context, "begin")
+        list_callback.begin(run_context)
         # used to stop training for early stop, such as stopAtTIme or stopATStep
         should_stop = False
 
         for i in range(epoch):
             cb_params.cur_epoch_num = i + 1
 
-            _callback_wrapper(list_callback, run_context, "epoch_begin")
+            list_callback.epoch_begin(run_context)
 
             for next_element in dataset_helper:
                 len_element = len(next_element)
@@ -342,7 +349,7 @@ class Model:
                     raise ValueError("when loss_fn is not None, train_dataset should"
                                      "return two elements, but got {}".format(len_element))
                 cb_params.cur_step_num += 1
-                _callback_wrapper(list_callback, run_context, "step_begin")
+                list_callback.step_begin(run_context)
 
                 overflow = False
                 if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
@@ -356,19 +363,19 @@ class Model:
                     overflow = np.all(overflow.asnumpy())
                     self._loss_scale_manager.update_loss_scale(overflow)
 
-                _callback_wrapper(list_callback, run_context, "step_end")
+                list_callback.step_end(run_context)
                 should_stop = should_stop or run_context.get_stop_requested()
                 if should_stop:
                     break
 
             train_dataset.reset()
 
-            _callback_wrapper(list_callback, run_context, "epoch_end")
+            list_callback.epoch_end(run_context)
             should_stop = should_stop or run_context.get_stop_requested()
             if should_stop:
                 break
 
-        _callback_wrapper(list_callback, run_context, "end")
+        list_callback.end(run_context)
 
     def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True):
         """
diff --git a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
index 2d69efa27c..8a24a2190c 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
@@ -92,6 +92,7 @@ class AddReluFactory:
     def forward_mindspore_parallel_impl(self):
         net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         x = Tensor(self.input_np1)
         y = Tensor(self.input_np2, ms.float32)
         inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
@@ -118,6 +119,7 @@ class AddReluFactory:
         net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
         x1 = Tensor(inputs_x[self.x_id])
diff --git a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
index 9499e2dd15..5d22db972e 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
@@ -249,6 +249,7 @@ class Conv2dFactory:
                          padding=self.padding, dilation=self.dilation,
                          group=self.group, has_bias=False, weight_init=weight, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
 
@@ -307,7 +308,8 @@ class Conv2dFactory:
 
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_train()        
+        grad_net.set_train()
+        grad_net.set_auto_parallel()
         out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], parallel_inputs_run=[x1, y1, output_grad1])
         return out_grad
     
diff --git a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
index da0fc5a9b6..c225776a66 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
@@ -95,6 +95,7 @@ class DropoutFactory:
         x1 = Tensor(inputs_x[self.x_id])
         net = Net(0.4, 0, 0, strategy=self.strategy0)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
         return out.asnumpy()
     
diff --git a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
index d9ab842b3a..2c5c91a3cc 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
@@ -118,6 +118,7 @@ class L2normalizeFactory:
         y1 = Tensor(inputs_y[self.y_id]) 
         net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
 
@@ -144,6 +145,7 @@ class L2normalizeFactory:
         net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], parallel_inputs_run=[x1, y1, output_grad1])
         return input_grad   
diff --git a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
index 592352e825..8701d2ae10 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
@@ -140,6 +140,7 @@ class AddReluFactory:
         net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
         grad_net = Grad(net_with_loss)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grads = []
         for i in range(0, 3):
diff --git a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
index 6810204bbf..e7171c1862 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
@@ -229,6 +229,7 @@ class BatchmatmulFactory:
         y1 = Tensor(ys[self.y_id]) #需要从设备矩阵推导
         z1 = Tensor(zs[self.x_id])
         matmul.set_train()
+        matmul.set_auto_parallel()
         out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
         return out_me.asnumpy()
         
@@ -267,6 +268,7 @@ class BatchmatmulFactory:
         out_grad1 = Tensor(out_grads[self.out_id])
         net_me = Grad(matmul)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net_me.set_auto_parallel()
         net_me.set_train()
         
         out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile = [x, y, z, out_grad1], parallel_inputs_run = [x1, y1, z1, out_grad1])
diff --git a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
index 64479a1afe..f2b1252499 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
@@ -119,6 +119,7 @@ class MaxFactory:
         y1 = Tensor(ys[self.y_id])
         net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
     
@@ -144,6 +145,7 @@ class MaxFactory:
         net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], parallel_inputs_run=[x1, y1, out_grad])
         return input_grad
diff --git a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
index 359736e605..efb0f92677 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
@@ -93,6 +93,7 @@ class MulSoftmaxFactory:
     def forward_mindspore_parallel_impl(self):
         net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         x = Tensor(self.input_np1)
         y = Tensor(self.input_np2, ms.float32)
         inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
@@ -120,6 +121,7 @@ class MulSoftmaxFactory:
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
         grad_net.set_train()
+        grad_net.set_auto_parallel()
         inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
         x1 = Tensor(inputs_x[self.x_id])
         y1 = Tensor(self.input_np2, ms.float32)
diff --git a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
index ca749437fa..f60061e3ac 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
@@ -113,6 +113,7 @@ class OneHotFactory:
                      on_value=self.on_value, 
                      off_value=self.off_value, strategy=self.strategy0)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
         return out.asnumpy()
 
diff --git a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
index 44d1b49c90..4a0e8157b6 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
@@ -86,6 +86,7 @@ class PReLUFactory:
     def forward_mindspore_parallel_impl(self):
         net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         x = Tensor(self.input_np)
         z = Tensor(np.zeros(self.input_np.shape), ms.float32)
         w = Tensor(self.weight)
@@ -122,6 +123,7 @@ class PReLUFactory:
         net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         
         grad_net.set_train()
         inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
diff --git a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
index bfb885919c..e07c473f03 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
@@ -176,6 +176,7 @@ class ReduceMeanFactory:
         y1 = Tensor(inputs_y[self.y_id])
         net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
 
@@ -202,6 +203,7 @@ class ReduceMeanFactory:
         net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
                               parallel_inputs_run=[x1, y1, output_grad1])
diff --git a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
index c1c90e833d..64582d02e8 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
@@ -121,6 +121,7 @@ class ReshapeFactory:
         y1 = Tensor(inputs_y[self.y_id]) 
         net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
 
@@ -147,6 +148,7 @@ class ReshapeFactory:
         net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], parallel_inputs_run=[x1, y1, output_grad1])
         return input_grad
diff --git a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
index 3e3f372e5a..8a77d23c2b 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
@@ -148,6 +148,7 @@ class TransposeFactory:
         y1 = Tensor(inputs_y[self.y_id]) 
         net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
         out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
         return out.asnumpy()
     
@@ -174,6 +175,7 @@ class TransposeFactory:
         net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
         grad_net = Grad(net)
         context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
         grad_net.set_train()
         input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], parallel_inputs_run=[x1, y1, output_grad1])
         return input_grad
diff --git a/tests/ut/python/parallel/test_add_relu_redistribution.py b/tests/ut/python/parallel/test_add_relu_redistribution.py
index 901b69728d..51f04b14d0 100644
--- a/tests/ut/python/parallel/test_add_relu_redistribution.py
+++ b/tests/ut/python/parallel/test_add_relu_redistribution.py
@@ -49,6 +49,12 @@ class Grad(nn.Cell):
     def construct(self, x, y):
         return C.grad_all(self.network)(x, y)
 
+
+def compile(net, x, y):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y)
+
+
 def test_add_relu_stride_slice():
     context.set_auto_parallel_context(device_num=8, global_rank=7)
     
@@ -59,7 +65,7 @@ def test_add_relu_stride_slice():
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y)
+    compile(net, x, y)
 
 def test_add_relu_all_gather():
     context.set_auto_parallel_context(device_num=8, global_rank=7)
@@ -71,4 +77,4 @@ def test_add_relu_all_gather():
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y)
\ No newline at end of file
+    compile(net, x, y)
\ No newline at end of file
diff --git a/tests/ut/python/parallel/test_arithmetic.py b/tests/ut/python/parallel/test_arithmetic.py
index 4c34c0371e..5b5e2e1144 100644
--- a/tests/ut/python/parallel/test_arithmetic.py
+++ b/tests/ut/python/parallel/test_arithmetic.py
@@ -42,6 +42,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_matmul_sub():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2):
@@ -64,7 +69,7 @@ def test_matmul_sub():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_add():
@@ -88,7 +93,7 @@ def test_matmul_add():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_mul():
@@ -112,7 +117,7 @@ def test_matmul_mul():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_div():
@@ -136,7 +141,7 @@ def test_matmul_div():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_matmul_greater():
     class Net(nn.Cell):
@@ -159,7 +164,7 @@ def test_matmul_greater():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_matmul_add_broadcast():
     class Net(nn.Cell):
@@ -182,7 +187,7 @@ def test_matmul_add_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_add_broadcast2():
@@ -206,7 +211,7 @@ def test_matmul_add_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_sub_broadcast():
@@ -230,7 +235,7 @@ def test_matmul_sub_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_sub_broadcast2():
@@ -254,7 +259,7 @@ def test_matmul_sub_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_mul_broadcast():
@@ -278,7 +283,7 @@ def test_matmul_mul_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_mul_broadcast2():
@@ -302,7 +307,7 @@ def test_matmul_mul_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_div_broadcast():
@@ -326,7 +331,7 @@ def test_matmul_div_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_div_broadcast2():
@@ -350,7 +355,7 @@ def test_matmul_div_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_matmul_greater_broadcast():
     class Net(nn.Cell):
@@ -373,7 +378,7 @@ def test_matmul_greater_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_greater_broadcast2():
@@ -397,7 +402,7 @@ def test_matmul_greater_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_matmul_floordiv():
     class Net(nn.Cell):
@@ -420,7 +425,7 @@ def test_matmul_floordiv():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_floordiv_broadcast():
@@ -444,7 +449,7 @@ def test_matmul_floordiv_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_floordiv_broadcast2():
@@ -468,7 +473,7 @@ def test_matmul_floordiv_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_assign_sub():
@@ -495,4 +500,4 @@ def test_assign_sub():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
     z = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, z)
+    compile(net, x, y, z)
diff --git a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
old mode 100755
new mode 100644
index 3a49719235..77f1386e8c
--- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
+++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
@@ -66,4 +66,5 @@ def test_auto_parallel_bn_with_prelu():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x)
diff --git a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
index 7c928be376..00473a5c7e 100644
--- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
+++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
@@ -43,6 +43,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, b):
         return C.grad_all(self.network)(x, y, b)
 
+
+def compile(net, x, y, b, phase):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b, phase=phase)
+
+
 def test_auto_parallel_arithmetic():
     class Net(nn.Cell):
         def __init__(self):
@@ -63,7 +69,7 @@ def test_auto_parallel_arithmetic():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 128]), dtype=ms.float32)
     b = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    _executor.compile(net, x, y, b, phase='train')
+    compile(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]],
                      'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]}
@@ -89,7 +95,7 @@ def test_auto_parallel_arithmetic_broadcast_both():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b, phase='train')
+    compile(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]],
                            'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]}
@@ -116,7 +122,7 @@ def test_auto_parallel_arithmetic_broadcast_right():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 32]), dtype=ms.float32)
     b = Tensor(np.ones([32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b, phase='train')
+    compile(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]],
                            'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
@@ -143,7 +149,7 @@ def test_auto_parallel_arithmetic_broadcast_left():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 32]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b, phase="train")
+    compile(net, x, y, b, phase="train")
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]],
                            'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
diff --git a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
old mode 100755
new mode 100644
index 5364263695..3de0596a1d
--- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
+++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
@@ -52,6 +52,7 @@ def test_auto_parallel_assign_sub_with_ref_key():
 
     net = NetWithLoss(nn.PReLU(4))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     _executor.compile(net, x, phase="train")
diff --git a/tests/ut/python/parallel/test_auto_parallel_cast.py b/tests/ut/python/parallel/test_auto_parallel_cast.py
index 4cfeb59fc1..841d28c9aa 100644
--- a/tests/ut/python/parallel/test_auto_parallel_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_cast.py
@@ -71,6 +71,7 @@ def test_double_star_graph():
 
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     _executor.compile(net, x, y, z, w, phase='train')
diff --git a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
index 89d73855c1..cf454ccc49 100644
--- a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
+++ b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
@@ -63,4 +63,5 @@ def test_common_parameter():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y, z)
\ No newline at end of file
diff --git a/tests/ut/python/parallel/test_auto_parallel_double_star.py b/tests/ut/python/parallel/test_auto_parallel_double_star.py
index 275046b6a6..852726ad45 100644
--- a/tests/ut/python/parallel/test_auto_parallel_double_star.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_star.py
@@ -74,4 +74,5 @@ def test_double_star_graph():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y, z, w, a, b, c)
diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
index 5e8c89de25..d842bca6f4 100644
--- a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
@@ -88,6 +88,7 @@ def test_double_subgraphs():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = TrainStepWarp(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)
     reset_op_id()
diff --git a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
index b5f5df905c..a4e127693c 100644
--- a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
+++ b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
@@ -61,4 +61,5 @@ def test_two_matmul():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
index adabdbfdc4..5efcda9cbb 100644
--- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
@@ -40,6 +40,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, z, w, b):
         return C.grad_all(self.network)(x, y, z, w, b)
 
+
+def compile(net, x, y, z, w, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, z, w, b)
+
+
     # model_parallel test
 def test_four_matmul_linear():
     class Net(nn.Cell):
@@ -67,7 +73,7 @@ def test_four_matmul_linear():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, z, w, b)
+    compile(net, x, y, z, w, b)
 
 
 def test_four_matmul1():
@@ -93,7 +99,7 @@ def test_four_matmul1():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, z, w, b)
+    compile(net, x, y, z, w, b)
 
 
 def test_four_matmul2():
@@ -120,4 +126,4 @@ def test_four_matmul2():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, z, w, b)
+    compile(net, x, y, z, w, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
index 40b6707d8e..dbe32cd825 100644
--- a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
+++ b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
@@ -63,6 +63,7 @@ def test_auto_parallel_l2normalize():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
index 550ad11ca9..51360cfaf9 100644
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
@@ -61,6 +61,7 @@ def test_two_matmul_dropout():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
index 4d68a7f883..6c7e5ba6f3 100644
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
@@ -63,6 +63,7 @@ def test_matmul_prelu():
 
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     _executor.compile(net, x, y, b, phase='train')
diff --git a/tests/ut/python/parallel/test_auto_parallel_onehot.py b/tests/ut/python/parallel/test_auto_parallel_onehot.py
index d82e3705f5..ebf9e40546 100644
--- a/tests/ut/python/parallel/test_auto_parallel_onehot.py
+++ b/tests/ut/python/parallel/test_auto_parallel_onehot.py
@@ -89,6 +89,7 @@ def test_auto_parallel_arithmetic():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
index 29e81f7f90..8b2840a9b0 100644
--- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
@@ -76,6 +76,7 @@ def test_common_parameter():
 
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     _executor.compile(net, x, y, z, w, phase='train')
diff --git a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
index e5fda638e0..217e13586d 100644
--- a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
+++ b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
@@ -68,5 +68,5 @@ def test_four_matmul_linear():
 
     net = GradWrap(NetWithLoss(Net(strategy1)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y, z, w, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
index 6e5576ed53..560e5abc14 100644
--- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
@@ -42,6 +42,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, b):
         return C.grad_all(self.network)(x, y, b)
 
+
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 # model_parallel test
 def test_sum_mul():
     class Net(nn.Cell):
@@ -64,7 +70,7 @@ def test_sum_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_sum_mul2():
     class Net(nn.Cell):
@@ -87,7 +93,7 @@ def test_sum_mul2():
     x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_sum_mul3():
     class Net(nn.Cell):
@@ -110,4 +116,4 @@ def test_sum_mul3():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py
index ed9f24cc0b..09769776a9 100644
--- a/tests/ut/python/parallel/test_auto_parallel_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py
@@ -62,6 +62,7 @@ def test_reshape_matmul():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x)
 
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_rhombus.py b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
index 830504d727..1d66aa3b5d 100644
--- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py
+++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
@@ -40,6 +40,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, b):
         return C.grad_all(self.network)(x, y, b)
 
+
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_rhombus1():
     class Net(nn.Cell):
         def __init__(self):
@@ -63,7 +69,7 @@ def test_rhombus1():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_rhombus2():
     class Net(nn.Cell):
@@ -93,7 +99,7 @@ def test_rhombus2():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_rhombus3():
     class Net(nn.Cell):
@@ -123,4 +129,4 @@ def test_rhombus3():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net, x, y, z)
\ No newline at end of file
+    compile(net, x, y, z)
diff --git a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
index 4167311c77..712e4c19f7 100644
--- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
+++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
@@ -57,6 +57,7 @@ def test_softmax_cross_entropy_loss_auto_parallel():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_transformer.py b/tests/ut/python/parallel/test_auto_parallel_transformer.py
index a572d31ba1..88cd322c77 100644
--- a/tests/ut/python/parallel/test_auto_parallel_transformer.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py
@@ -102,4 +102,5 @@ def test_dmnet_train_step():
     input = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
     net = GradWrap(NetWithLoss(MultiTransformer()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, input)
diff --git a/tests/ut/python/parallel/test_auto_parallel_transpose.py b/tests/ut/python/parallel/test_auto_parallel_transpose.py
index 2d4dcbae81..821546330c 100644
--- a/tests/ut/python/parallel/test_auto_parallel_transpose.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py
@@ -67,6 +67,7 @@ def test_two_matmul_transpose():
 
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
 
     _executor.compile(net, x, y, b, phase='train')
diff --git a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
index 0f8949d3fc..fba9f7855a 100644
--- a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
+++ b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
@@ -69,6 +69,7 @@ def test_virtual_dataset_3_input():
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
     context.set_auto_parallel_context(device_num=8, global_rank=0)
+    net.set_auto_parallel()
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 2048]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_bn.py b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
index 8eb6074f9f..41219a1e42 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_bn.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
@@ -54,6 +54,7 @@ def test_two_bn():
     context.set_context(save_graphs=True)
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     set_algo_parameters(elementwise_op_strategy_follow=True)
     reset_op_id()
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
index 2e2ddd8f32..443567da58 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@@ -124,6 +124,7 @@ def test_two_matmul():
 
     net = NetWithLoss(Net())
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     reset_op_id()
     
     _executor.compile(net, x, y, b, phase='train')
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
index 77973924b2..8e6b006db8 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
@@ -62,4 +62,5 @@ def test_four_matmul_linear():
 
     net = GradWrap(NetWithLoss(Net(strategy1)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y)
\ No newline at end of file
diff --git a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
index 72af3ccbe5..5184d308ee 100644
--- a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
+++ b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
@@ -68,4 +68,5 @@ def test_zig_zag_graph():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y, z, w, a)
diff --git a/tests/ut/python/parallel/test_auto_star_elimination.py b/tests/ut/python/parallel/test_auto_star_elimination.py
index db3b434676..100f24dda3 100644
--- a/tests/ut/python/parallel/test_auto_star_elimination.py
+++ b/tests/ut/python/parallel/test_auto_star_elimination.py
@@ -85,4 +85,5 @@ def test_marin_loss():
 
     net = GradWrap(NetWithLoss(MarginCE()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    net.set_auto_parallel()
     _executor.compile(net, x, y)
\ No newline at end of file
diff --git a/tests/ut/python/parallel/test_batch_matmul.py b/tests/ut/python/parallel/test_batch_matmul.py
index 88ba818c91..e230105ed9 100644
--- a/tests/ut/python/parallel/test_batch_matmul.py
+++ b/tests/ut/python/parallel/test_batch_matmul.py
@@ -43,6 +43,7 @@ _b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_batch_parallel.py b/tests/ut/python/parallel/test_batch_parallel.py
index 44670bf7e4..9e09d66f45 100644
--- a/tests/ut/python/parallel/test_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batch_parallel.py
@@ -100,6 +100,7 @@ def test_batch():
 
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32)
     w1 = Tensor(np.ones([128, 8, 32, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_batch_parallel_dropout.py b/tests/ut/python/parallel/test_batch_parallel_dropout.py
index 08db155cca..7aa2a049bb 100644
--- a/tests/ut/python/parallel/test_batch_parallel_dropout.py
+++ b/tests/ut/python/parallel/test_batch_parallel_dropout.py
@@ -61,6 +61,7 @@ def test_batch_parallel_dropout():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
index d16496a933..9918d72bec 100644
--- a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
+++ b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
@@ -58,6 +58,7 @@ def test_matmul_add():
     
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_comparison_function_info.py b/tests/ut/python/parallel/test_comparison_function_info.py
index 93ec5e5981..6f5ea4e26a 100644
--- a/tests/ut/python/parallel/test_comparison_function_info.py
+++ b/tests/ut/python/parallel/test_comparison_function_info.py
@@ -42,6 +42,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, b):
         return C.grad_all(self.network)(x, y, b)
 
+
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_matmul_equal():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2):
@@ -62,7 +68,7 @@ def test_matmul_equal():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_not_equal():
@@ -85,7 +91,7 @@ def test_matmul_not_equal():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_not_equal_repeated_calculation():
@@ -108,7 +114,7 @@ def test_matmul_not_equal_repeated_calculation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_maximum():
@@ -131,7 +137,7 @@ def test_matmul_maximum():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_maximum_broadcast():
@@ -154,7 +160,7 @@ def test_matmul_maximum_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_maximum_broadcast2():
@@ -177,7 +183,7 @@ def test_matmul_maximum_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_minimum():
@@ -200,7 +206,7 @@ def test_matmul_minimum():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_minimum_broadcast():
@@ -223,7 +229,7 @@ def test_matmul_minimum_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_minimum_broadcast2():
@@ -246,7 +252,7 @@ def test_matmul_minimum_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_minimum_auto_parallel():
@@ -267,4 +273,4 @@ def test_matmul_minimum_auto_parallel():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_different_type_for_div_op.py b/tests/ut/python/parallel/test_different_type_for_div_op.py
index 9e9bbbe05e..2268437c82 100644
--- a/tests/ut/python/parallel/test_different_type_for_div_op.py
+++ b/tests/ut/python/parallel/test_different_type_for_div_op.py
@@ -31,6 +31,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, bias)
 
 
+def compile(net, x, y, bias):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, bias)
+
+
 def test_sum_as_loss_float16():
     class Net(nn.Cell):
         def __init__(self, strategy0, strategy1):
@@ -52,7 +57,7 @@ def test_sum_as_loss_float16():
     x = Tensor(np.ones([64, 32]), dtype=ms.float16)
     y = Tensor(np.ones([64, 32]), dtype=ms.float16)
     bias = Tensor(np.ones([64]), dtype=ms.float16)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
 
 
 def test_sum_as_loss_float32():
@@ -76,7 +81,7 @@ def test_sum_as_loss_float32():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
 
 
 def test_sum_as_loss_int32():
@@ -100,4 +105,4 @@ def test_sum_as_loss_int32():
     x = Tensor(np.ones([64, 32]), dtype=ms.int32)
     y = Tensor(np.ones([64, 32]), dtype=ms.int32)
     bias = Tensor(np.ones([64]), dtype=ms.int32)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_dropout_do_mask.py b/tests/ut/python/parallel/test_dropout_do_mask.py
index cfa7f50135..c22a07fc31 100644
--- a/tests/ut/python/parallel/test_dropout_do_mask.py
+++ b/tests/ut/python/parallel/test_dropout_do_mask.py
@@ -52,6 +52,7 @@ _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x, _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py
index 641eb19f20..7cf2253184 100644
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@@ -43,6 +43,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_matmul_pow():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2):
@@ -66,7 +71,7 @@ def test_matmul_pow():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_exp():
@@ -92,7 +97,7 @@ def test_matmul_exp():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_log():
@@ -118,7 +123,7 @@ def test_matmul_log():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_logical_not():
@@ -145,7 +150,7 @@ def test_matmul_logical_not():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_matmul_cast():
     class Net(nn.Cell):
@@ -171,7 +176,7 @@ def test_matmul_cast():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.int32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cast_before_mirror():
@@ -195,7 +200,7 @@ def test_cast_before_mirror():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float16)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cast_before_mirror1():
@@ -219,7 +224,7 @@ def test_cast_before_mirror1():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cast_before_mirror2():
@@ -243,7 +248,7 @@ def test_cast_before_mirror2():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cast_before_mirror3():
@@ -267,7 +272,7 @@ def test_cast_before_mirror3():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_mul_two_cast():
@@ -296,4 +301,4 @@ def test_mul_two_cast():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_expand_dims.py b/tests/ut/python/parallel/test_expand_dims.py
index 676e9ed523..b35c36b7fa 100644
--- a/tests/ut/python/parallel/test_expand_dims.py
+++ b/tests/ut/python/parallel/test_expand_dims.py
@@ -56,6 +56,7 @@ _b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py
index 76cd5b4178..944cb5532c 100644
--- a/tests/ut/python/parallel/test_forward_graph.py
+++ b/tests/ut/python/parallel/test_forward_graph.py
@@ -39,6 +39,7 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 
 
 def compile(net):
+    net.set_auto_parallel()
     _executor.compile(net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_get_next.py b/tests/ut/python/parallel/test_get_next.py
index 86bf3b4226..84789db10e 100644
--- a/tests/ut/python/parallel/test_get_next.py
+++ b/tests/ut/python/parallel/test_get_next.py
@@ -52,6 +52,12 @@ class GradWrap(nn.Cell):
     def construct(self):
         return C.grad_by_list(self.network, self.weights)()
 
+
+def compile(net):
+    net.set_auto_parallel()
+    _executor.compile(net)
+
+
 def test_get_next_single():
     class Net(nn.Cell):
         def __init__(self, channel=1, w=0.25):
@@ -87,7 +93,7 @@ def test_get_next_semi_auto_parallel():
     net_with_loss = NetWithLoss(network, [ms.float32, ms.int32],[[32,64], [32]], 2, strategy3=strategy3, strategy4=strategy4)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    _executor.compile(net)
+    compile(net)
 
 def test_get_next_semi_auto_parallel1():
     class Net(nn.Cell):
@@ -109,7 +115,7 @@ def test_get_next_semi_auto_parallel1():
     net_with_loss = NetWithLoss(network, [ms.float32, ms.int32],[[32,64], [32]], 2, strategy3=strategy3, strategy4=strategy4)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    _executor.compile(net)
+    compile(net)
 
 def test_get_next_auto_parallel():
     class Net(nn.Cell):
@@ -129,7 +135,7 @@ def test_get_next_auto_parallel():
     net_with_loss = NetWithLoss(network, [ms.float32, ms.int32],[[32,64], [32]], 2)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    _executor.compile(net)
+    compile(net)
 
 
 def test_only_one_get_next():
@@ -145,4 +151,4 @@ def test_only_one_get_next():
     context.set_auto_parallel_context(device_num=4, global_rank=0)
     net = Net()
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    _executor.compile(net)
+    compile(net)
diff --git a/tests/ut/python/parallel/test_get_parameter_layout.py b/tests/ut/python/parallel/test_get_parameter_layout.py
index 82f2fc44f9..b390ce9b33 100644
--- a/tests/ut/python/parallel/test_get_parameter_layout.py
+++ b/tests/ut/python/parallel/test_get_parameter_layout.py
@@ -45,13 +45,14 @@ def test_get_parameter_layout():
     weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
 
     net = Net(strategy1, strategy2, weight)
+    net.set_auto_parallel()
     exe = me._executor
-    exe.compile(net, x)
+    exe.compile(net, x, auto_parallel_mode=True)
     x_layout = ([2, 4], [1, -1])  # device_arrangement = [2, 4], tensor_map = [1, -1]
     weight_layout = ([2, 4], [0, -1])  # device_arrangement = [2, 4], tensor_map = [0, -1]
     expect_dict = {'x': x_layout, 'w1': weight_layout}
     # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut 
-    assert (net._parameter_layout_dict == expect_dict)
+    assert (net.parameter_layout_dict == expect_dict)
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/parallel/test_hybird_parallel_activation.py b/tests/ut/python/parallel/test_hybird_parallel_activation.py
index c4d7d1d302..98f5b76b7d 100644
--- a/tests/ut/python/parallel/test_hybird_parallel_activation.py
+++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py
@@ -43,6 +43,10 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
 def test_matmul_tanh():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2, strategy3):
@@ -66,7 +70,7 @@ def test_matmul_tanh():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_activation():
@@ -92,7 +96,7 @@ def test_matmul_activation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_softmax():
@@ -118,7 +122,7 @@ def test_matmul_softmax():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_matmul_logsoftmax():
@@ -144,7 +148,7 @@ def test_matmul_logsoftmax():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_activations():
@@ -173,7 +177,7 @@ def test_activations():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 def test_activations_repeated_calculation():
     class Net(nn.Cell):
@@ -204,7 +208,7 @@ def test_activations_repeated_calculation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_activations_axis_tuple():
@@ -236,4 +240,4 @@ def test_activations_axis_tuple():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_l2normalize.py b/tests/ut/python/parallel/test_l2normalize.py
index ae0ee29e83..327abf71eb 100644
--- a/tests/ut/python/parallel/test_l2normalize.py
+++ b/tests/ut/python/parallel/test_l2normalize.py
@@ -65,6 +65,7 @@ def test_l2normalize_matmul():
     strategy3 = ((1, 1, 8), (1, 1, 8))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py
index 50f49dcc77..92bb26f346 100644
--- a/tests/ut/python/parallel/test_layer_norm.py
+++ b/tests/ut/python/parallel/test_layer_norm.py
@@ -50,6 +50,7 @@ _b = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_linear.py b/tests/ut/python/parallel/test_linear.py
index ca31dd8721..6b40fc8963 100644
--- a/tests/ut/python/parallel/test_linear.py
+++ b/tests/ut/python/parallel/test_linear.py
@@ -62,6 +62,7 @@ def test_linear():
     strategy3 = ((16, 1), (16, 1))
     net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2), strategy3))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_loop_two_matmul.py b/tests/ut/python/parallel/test_loop_two_matmul.py
index 9d72406977..7716d14930 100644
--- a/tests/ut/python/parallel/test_loop_two_matmul.py
+++ b/tests/ut/python/parallel/test_loop_two_matmul.py
@@ -90,6 +90,7 @@ def test_two_matmul():
             print(strategy1, strategy2)
             net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
             context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+            net.set_auto_parallel()
             _executor.compile(net, x, y, b)
             count = count + 1
 
diff --git a/tests/ut/python/parallel/test_loss_and_optimizer.py b/tests/ut/python/parallel/test_loss_and_optimizer.py
index b87f2a556a..123184fef5 100644
--- a/tests/ut/python/parallel/test_loss_and_optimizer.py
+++ b/tests/ut/python/parallel/test_loss_and_optimizer.py
@@ -35,6 +35,11 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict, b)[0]
 
 
+def compile(net, x, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, b)
+
+
 def test_momentum():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2, weight):
@@ -66,7 +71,7 @@ def test_momentum():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _executor.compile(train_net, x,  b)
+    compile(train_net, x,  b)
 
 
 def test_momentum_with_loss_scale():
@@ -100,7 +105,7 @@ def test_momentum_with_loss_scale():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _executor.compile(train_net, x,  b)
+    compile(train_net, x,  b)
 
 
 def test_momentum_with_dynamic_lr():
@@ -135,7 +140,7 @@ def test_momentum_with_dynamic_lr():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _executor.compile(train_net, x,  b)
+    compile(train_net, x,  b)
 
 
 def test_momentum_with_loss_scale_and_dynamic_lr():
@@ -171,7 +176,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _executor.compile(train_net, x,  b)
+    compile(train_net, x,  b)
 
 def test_lars():
     class Net(nn.Cell):
@@ -205,4 +210,4 @@ def test_lars():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _executor.compile(train_net, x,  b)
+    compile(train_net, x,  b)
diff --git a/tests/ut/python/parallel/test_matmul_dropout.py b/tests/ut/python/parallel/test_matmul_dropout.py
index 536bffdbfa..436690d179 100644
--- a/tests/ut/python/parallel/test_matmul_dropout.py
+++ b/tests/ut/python/parallel/test_matmul_dropout.py
@@ -66,7 +66,7 @@ def test_two_matmul_dropout():
     strategy3 = ((1, 8), (8, 1))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_matmul_tensor.py b/tests/ut/python/parallel/test_matmul_tensor.py
index 3987d66a5b..39bb2c6a37 100644
--- a/tests/ut/python/parallel/test_matmul_tensor.py
+++ b/tests/ut/python/parallel/test_matmul_tensor.py
@@ -45,6 +45,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y)
 
 
+def compile(net, x, y):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y)
+
+
 # model_parallel test
 def test_two_matmul():
     class Net(nn.Cell):
@@ -73,7 +78,7 @@ def test_two_matmul():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 128]), dtype=ms.float32)
     
-    _executor.compile(net, x, y)
+    compile(net, x, y)
 
 
 def test_matmul_mul_broadcast2():
@@ -97,8 +102,8 @@ def test_matmul_mul_broadcast2():
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
-    
-    _executor.compile(net, x, y)
+    compile(net, x, y)
+
 
 def test_two_matmul1():
     class Net(nn.Cell):
@@ -127,7 +132,8 @@ def test_two_matmul1():
     x = Tensor(np.ones([128, 128]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128]), dtype=ms.float32)
     
-    _executor.compile(net, x, y)
+    compile(net, x, y)
+
 
 def test_matmul_add_tensor():
     class Net(nn.Cell):
@@ -151,4 +157,4 @@ def test_matmul_add_tensor():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     
-    _executor.compile(net, x, y)
+    compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py
index e619363437..f67b854e79 100644
--- a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py
+++ b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py
@@ -76,6 +76,7 @@ def test_two_matmul():
     strategy4 = ((2, 4), (4, 1))
     net = GradWrap(NetWithLoss(Net2(strategy1, strategy2, strategy3, strategy4).add_flags_recursive(fp16=True)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_model_callback.py b/tests/ut/python/parallel/test_model_callback.py
deleted file mode 100644
index f48b10ef7f..0000000000
--- a/tests/ut/python/parallel/test_model_callback.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from mindspore.train import Model, ParallelMode
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
-from mindspore.nn.optim.momentum import Momentum
-from mindspore import Tensor
-import mindspore as ms
-import numpy as np
-from mindspore.ops import operations as P
-import mindspore.nn as nn
-from mindspore.common.parameter import Parameter
-from tests.dataset_mock import MindData
-from mindspore import context
-from mindspore.parallel._utils import _reset_op_id
-from mindspore.train.callback import Callback
-
-
-context.set_context(mode=context.GRAPH_MODE)
-
-
-class Dataset(MindData):
-    def __init__(self, predict, label, length=3):
-        super(Dataset, self).__init__(size=length)
-        self.predict = predict
-        self.label = label
-        self.index = 0
-        self.length = length
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.index >= self.length:
-            raise StopIteration
-        self.index += 1
-        return self.predict, self.label
-
-    def reset(self):
-        self.index = 0
-
-
-class AllToAllNet(nn.Cell):
-    def __init__(self, strategy1):
-        super(AllToAllNet, self).__init__()
-        self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8)))
-        self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
-        self.transpose1 = P.Transpose().set_strategy(strategy1)
-
-    def construct(self, x):
-        x = self.matmul(x, self.matmul_weight)
-        x = self.transpose1(x, (1, 0))
-        return x
-
-
-def all_to_all_net(strategy1):
-    return AllToAllNet(strategy1=strategy1)
-
-
-class ContextCallback(Callback):
-    def begin(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-    def epoch_begin(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-    def epoch_end(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-    def step_begin(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-    def step_end(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-    def end(self, run_context):
-        parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        assert parallel_mode == ParallelMode.STAND_ALONE
-
-
-def all_to_all_common(strategy1):
-    learning_rate = 0.1
-    momentum = 0.9
-    epoch_size = 2
-
-    context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=8)
-    predict = Tensor(np.ones([32, 128]), dtype=ms.float32)
-    label = Tensor(np.ones([32]), dtype=ms.int32)
-    dataset = Dataset(predict, label, 2)
-    net = all_to_all_net(strategy1)
-
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
-    loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
-    opt = Momentum(net.trainable_params(), learning_rate, momentum)
-    model = Model(net, loss, opt)
-
-    context_callback = ContextCallback()
-
-    model.train(epoch_size, dataset, dataset_sink_mode=False, callbacks=[context_callback])
-
-    parallel_mode = context.get_auto_parallel_context("parallel_mode")
-    assert parallel_mode == ParallelMode.SEMI_AUTO_PARALLEL
-
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=8)
-    model.train(epoch_size, dataset, dataset_sink_mode=False, callbacks=[context_callback])
-    parallel_mode = context.get_auto_parallel_context("parallel_mode")
-    assert parallel_mode == ParallelMode.AUTO_PARALLEL
-
-    context.reset_auto_parallel_context()
-
-
-def test_model_callback():
-    strategy1 = ((8, 1), )
-    _reset_op_id()
-    all_to_all_common(strategy1)
-
-
-
diff --git a/tests/ut/python/parallel/test_neg.py b/tests/ut/python/parallel/test_neg.py
index 0e08e8c096..4d9e16fd6f 100644
--- a/tests/ut/python/parallel/test_neg.py
+++ b/tests/ut/python/parallel/test_neg.py
@@ -41,6 +41,7 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_one_hot_net.py b/tests/ut/python/parallel/test_one_hot_net.py
index 87b4acfe37..fd06535b66 100644
--- a/tests/ut/python/parallel/test_one_hot_net.py
+++ b/tests/ut/python/parallel/test_one_hot_net.py
@@ -271,6 +271,7 @@ def test_bn_reshape_dense_bn_train_loss():
 
     net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
     
     _executor.compile(net, input, label)
 
@@ -284,6 +285,7 @@ def test_semi_one_hot_net_batch():
     net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch())
     net = GradWrap(NetWithLoss(net))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
     
     _executor.compile(net, input, label)
 
diff --git a/tests/ut/python/parallel/test_one_weight_parameter.py b/tests/ut/python/parallel/test_one_weight_parameter.py
index 03dd4f2920..5c7f324c47 100644
--- a/tests/ut/python/parallel/test_one_weight_parameter.py
+++ b/tests/ut/python/parallel/test_one_weight_parameter.py
@@ -63,11 +63,11 @@ def test_one_weight_parameter():
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
 
     net = Net(strategy1, weight)
-    print ("======================================dict", net.__dict__)
 
     net_with_loss = NetWithLoss(net, strategy3)
 
     train_net = OneStepCell(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    train_net.set_auto_parallel()
 
     _executor.compile(train_net, x, b)
diff --git a/tests/ut/python/parallel/test_onehot.py b/tests/ut/python/parallel/test_onehot.py
index b41e3b2188..6ed93f5241 100644
--- a/tests/ut/python/parallel/test_onehot.py
+++ b/tests/ut/python/parallel/test_onehot.py
@@ -64,6 +64,7 @@ class Net(nn.Cell):
 
 def compile_graph(strategy1, strategy2, strategy3, strategy4, auto=False, onthot_axis=-1):
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2), strategy3, strategy4, axis=onthot_axis))
+    net.set_auto_parallel()
     if auto:
         context.set_auto_parallel_context(parallel_mode="auto_parallel")
     else:
diff --git a/tests/ut/python/parallel/test_optimizer.py b/tests/ut/python/parallel/test_optimizer.py
index fea31fc984..395adda179 100644
--- a/tests/ut/python/parallel/test_optimizer.py
+++ b/tests/ut/python/parallel/test_optimizer.py
@@ -58,6 +58,6 @@ def test_dense_gen_graph():
 
     predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([64, 32]).astype(np.float32))
-
+    network.set_auto_parallel()
     _executor.compile(network, predict, label)
-    
+
diff --git a/tests/ut/python/parallel/test_optimizer_clone_weight.py b/tests/ut/python/parallel/test_optimizer_clone_weight.py
index 84bda61dc1..969c2f0cff 100644
--- a/tests/ut/python/parallel/test_optimizer_clone_weight.py
+++ b/tests/ut/python/parallel/test_optimizer_clone_weight.py
@@ -34,6 +34,11 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict, b)[0]
 
 
+def compile(net, x, b):
+    net.set_auto_parallel()
+    _Executor().compile(net, x, b)
+
+
 def test_optimizer_clone_weight():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2, weight):
@@ -66,7 +71,7 @@ def test_optimizer_clone_weight():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _Executor().compile(train_net, x,  b)
+    compile(train_net, x,  b)
 
 
 def test_optimizer_clone_weight2():
@@ -101,4 +106,4 @@ def test_optimizer_clone_weight2():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    _Executor().compile(train_net, x,  b)
+    compile(train_net, x,  b)
diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py
old mode 100755
new mode 100644
index 5638c9cdbd..67bdb3de04
--- a/tests/ut/python/parallel/test_prelu.py
+++ b/tests/ut/python/parallel/test_prelu.py
@@ -43,6 +43,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y)
 
 
+def compile(net, x, y):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y)
+
+
 def test_prelu_single_success1():
     class Net(nn.Cell):
         def __init__(self):
@@ -57,7 +62,8 @@ def test_prelu_single_success1():
     net = GradWrap(NetWithLoss(Net()))
     x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
     w = Tensor(np.random.rand(33), ms.float32)
-    _executor.compile(net, x, w)
+    compile(net, x, w)
+
 
 def test_prelu_single_success2():
     class Net(nn.Cell):
@@ -73,7 +79,8 @@ def test_prelu_single_success2():
     net = GradWrap(NetWithLoss(Net()))
     x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
     w = Tensor([0.1], ms.float32)
-    _executor.compile(net, x, w)
+    compile(net, x, w)
+
 
 def test_prelu_parallel_success1():
     class Net(nn.Cell):
@@ -90,7 +97,8 @@ def test_prelu_parallel_success1():
     x = Tensor(np.random.rand(4, 4, 32, 64),dtype=ms.float32)
     w = Tensor(np.random.rand(4),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    _executor.compile(net, x, w)
+    compile(net, x, w)
+
 
 def test_prelu_parallel_success2():
     class Net(nn.Cell):
@@ -107,7 +115,8 @@ def test_prelu_parallel_success2():
     x = Tensor(np.random.rand(4, 4, 32, 64),dtype=ms.float32)
     w = Tensor(np.random.rand(4),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    _executor.compile(net, x, w)
+    compile(net, x, w)
+
 
 def test_prelu_parallel_success3():
     class NetWithLoss(nn.Cell):
@@ -148,8 +157,10 @@ def test_prelu_parallel_success3():
     y = Tensor(np.random.rand(64, 16),dtype=ms.float32)
     w = Tensor(np.random.rand(16),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
+    net.set_auto_parallel()
     _executor.compile(net, x, y, w)
 
+
 def test_prelu_parallel_success4():
     class Net(nn.Cell):
         def __init__(self, strategy):
@@ -165,7 +176,8 @@ def test_prelu_parallel_success4():
     x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32)
     w = Tensor(np.random.rand(16),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    _executor.compile(net, x, w)
+    compile(net, x, w)
+
 
 def test_prelu_parallel_success5():
     class Net(nn.Cell):
@@ -182,5 +194,4 @@ def test_prelu_parallel_success5():
     x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32)
     w = Tensor(np.random.rand(1),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    _executor.compile(net, x, w)
-
+    compile(net, x, w)
diff --git a/tests/ut/python/parallel/test_reduce_method_info.py b/tests/ut/python/parallel/test_reduce_method_info.py
index 57a8e54fd2..2686e593ac 100644
--- a/tests/ut/python/parallel/test_reduce_method_info.py
+++ b/tests/ut/python/parallel/test_reduce_method_info.py
@@ -42,6 +42,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, b):
         return C.grad_all(self.network)(x, y, b)
 
+
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 # model_parallel test
 def test_sum_mul():
     class Net(nn.Cell):
@@ -67,7 +73,8 @@ def test_sum_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
+
 
 def test_sum_mul2():
     class Net(nn.Cell):
@@ -93,7 +100,8 @@ def test_sum_mul2():
     x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
+
 
 def test_sum_mul3():
     class Net(nn.Cell):
@@ -119,7 +127,8 @@ def test_sum_mul3():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
+
 
 def test_sum_mul4():
     class Net(nn.Cell):
@@ -145,7 +154,7 @@ def test_sum_mul4():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32, 1]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_sum_mul5():
@@ -169,7 +178,7 @@ def test_sum_mul5():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_sum_mul6():
@@ -193,7 +202,7 @@ def test_sum_mul6():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_sum_mul7():
@@ -217,7 +226,7 @@ def test_sum_mul7():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_max_mul():
@@ -244,7 +253,7 @@ def test_max_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_min_mul():
@@ -271,7 +280,7 @@ def test_min_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_reduce_mean_mul_float32():
@@ -299,7 +308,7 @@ def test_reduce_mean_mul_float32():
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
     
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 class ArgMaxWithValueNet(nn.Cell):
@@ -334,7 +343,7 @@ def gen_inputs_and_compile(net):
     x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():
@@ -467,7 +476,7 @@ def test_cross_batch():
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cross_batch2():
@@ -495,7 +504,7 @@ def test_cross_batch2():
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_cross_batch_auto():
@@ -515,12 +524,11 @@ def test_cross_batch_auto():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    
 
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_max_empty_tuple():
@@ -548,4 +556,4 @@ def test_max_empty_tuple():
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
     
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index f72e5f909b..b3386a49ce 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -303,6 +303,11 @@ class ReshapeNet6(nn.Cell):
         return matmul2_o
 
 
+def compile(net, input):
+    net.set_auto_parallel()
+    _executor.compile(net, input)
+
+
 def reshape_net2(backbone):
     batch_size = 16
     device_num = 16
@@ -312,7 +317,7 @@ def reshape_net2(backbone):
     net = GradWrap(NetWithLoss(backbone))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
     
-    _executor.compile(net, input)
+    compile(net, input)
 
 
 def test_reshape_net1_1():
@@ -475,7 +480,7 @@ def test_batchnorm_reshape_train():
 
     net = GradWrap(NetWithLoss(BatchNormReshapeNet()))
     
-    _executor.compile(net, input)
+    compile(net, input)
 
 
 def bn_with_initialize(out_channels):
@@ -513,7 +518,7 @@ def test_bn_reshape_dense_bn_train():
     net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
     
-    _executor.compile(net, input)
+    compile(net, input)
 
 
 class ParallelReduceMeanNet(nn.Cell):
diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py
index be35e8f43a..bfd35d9c0d 100644
--- a/tests/ut/python/parallel/test_reshape_parameter.py
+++ b/tests/ut/python/parallel/test_reshape_parameter.py
@@ -57,13 +57,18 @@ class Net(nn.Cell):
         return out
 
 
+def compile(net, x, y):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y)
+
+
 def test_reshape_parameter_data_parallel():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy = ((8, 1, 1), (8, 1, 1))
     net = GradWrap(NetWithLoss(Net(strategy)))
     x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
     y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
-    _executor.compile(net, x, y)
+    compile(net, x, y)
 
 
 def test_reshape_parameter_model_parallel():
@@ -72,4 +77,4 @@ def test_reshape_parameter_model_parallel():
     net = GradWrap(NetWithLoss(Net(strategy)))
     x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
     y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
-    _executor.compile(net, x, y)
+    compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_scalar_loss.py b/tests/ut/python/parallel/test_scalar_loss.py
index b52d08cba1..cf9b9a9599 100644
--- a/tests/ut/python/parallel/test_scalar_loss.py
+++ b/tests/ut/python/parallel/test_scalar_loss.py
@@ -51,6 +51,7 @@ def test_sum_as_loss():
     strategy1 = ((4, 1), )
     net = GradWrap(Net(strategy0, strategy1))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py
index b572968a4f..3c287a25d3 100644
--- a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py
+++ b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py
@@ -105,4 +105,5 @@ def test_two_subgraphs():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     net = TrainStepWrap(NetWithLoss(Net()))
     input_x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)
+    net.set_auto_parallel()
     _executor.compile(net, input_x)
diff --git a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
index d59d053b07..0f890ced5e 100644
--- a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
+++ b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
@@ -41,6 +41,7 @@ _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 def compile(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_expand.py b/tests/ut/python/parallel/test_softmax_cross_entropy_expand.py
index c4b106edf5..ef4c2b53c3 100644
--- a/tests/ut/python/parallel/test_softmax_cross_entropy_expand.py
+++ b/tests/ut/python/parallel/test_softmax_cross_entropy_expand.py
@@ -25,5 +25,5 @@ def test_SoftmaxCrossEntropy():
     logit = Tensor(np.ones([64, 512]), dtype=mstype.float32)
     label = Tensor(np.ones([64]), dtype=mstype.int32)
     context.set_auto_parallel_context(device_num=8, global_rank=0)
-    
+    net.set_auto_parallel()
     _executor.compile(net, logit, label)
diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
index ecfe00bd82..a097197a8b 100644
--- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
+++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
@@ -42,6 +42,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_softmax_cross_entropy_loss():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2):
@@ -64,7 +69,7 @@ def test_softmax_cross_entropy_loss():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_softmax_cross_entropy_loss_repeated_calculation():
@@ -89,7 +94,7 @@ def test_softmax_cross_entropy_loss_repeated_calculation():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_softmax_cross_entropy_loss_auto_batch_parallel():
@@ -111,4 +116,4 @@ def test_softmax_cross_entropy_loss_auto_batch_parallel():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_split_grad_sens.py b/tests/ut/python/parallel/test_split_grad_sens.py
index 5e2d052ed5..fe7f4085ec 100644
--- a/tests/ut/python/parallel/test_split_grad_sens.py
+++ b/tests/ut/python/parallel/test_split_grad_sens.py
@@ -53,6 +53,11 @@ class GradWrap3(nn.Cell):
         return C.grad_all(self.network)(x, y, bias)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 def test_no_grad():
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2):
@@ -75,7 +80,7 @@ def test_no_grad():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_grad_sens_parameter_type():
@@ -103,6 +108,7 @@ def test_grad_sens_parameter_type():
 
     sens = Tensor(np.ones([128, 64]), dtype=ms.float32)
     # net(x, y, b, sens)
+    net.set_auto_parallel()
     _executor.compile(net, x, y, b, sens)
 
 
@@ -128,7 +134,7 @@ def test_grad_sens_tensor_type():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_grad_sens_scalar_broadcast():
@@ -152,4 +158,4 @@ def test_grad_sens_scalar_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_square.py b/tests/ut/python/parallel/test_square.py
index e9c182a439..a2797f7333 100644
--- a/tests/ut/python/parallel/test_square.py
+++ b/tests/ut/python/parallel/test_square.py
@@ -43,6 +43,7 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
     _executor.compile(train_net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_squeeze_info.py b/tests/ut/python/parallel/test_squeeze_info.py
index 3169e2fb1b..fb726055fe 100644
--- a/tests/ut/python/parallel/test_squeeze_info.py
+++ b/tests/ut/python/parallel/test_squeeze_info.py
@@ -37,6 +37,7 @@ _b = Tensor(np.ones([64, 32]), dtype=ms.float32)
 
 
 def compile(net):
+    net.set_auto_parallel()
     _executor.compile(net, _x,  _b)
     context.reset_auto_parallel_context()
 
diff --git a/tests/ut/python/parallel/test_step_parallel.py b/tests/ut/python/parallel/test_step_parallel.py
index 140e34a0d1..4f65e8e9d6 100644
--- a/tests/ut/python/parallel/test_step_parallel.py
+++ b/tests/ut/python/parallel/test_step_parallel.py
@@ -71,5 +71,5 @@ def test_two_matmul():
     y = Tensor(np.ones([32, 128]), dtype=ms.float32)
     b = Tensor(np.ones([128, 128]), dtype=ms.float32)
     a = Tensor(np.ones([128, 128]), dtype=ms.float32)
-    
+    net.set_auto_parallel()
     _executor.compile(net, x, y, b, a)
diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py
index d95b13f435..8edf6dbc72 100644
--- a/tests/ut/python/parallel/test_strategy_checkpoint.py
+++ b/tests/ut/python/parallel/test_strategy_checkpoint.py
@@ -81,7 +81,7 @@ def test_six_matmul_save():
     strategy6 = ((4, 1), (1, 2))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3, strategy4, strategy5, strategy6)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
+    net.set_auto_parallel()
     x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
     x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     _executor.compile(net, x1, x6)
@@ -142,7 +142,7 @@ def test_six_matmul_load():
     strategy7 = ((8, 1), (1, 1))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
+    net.set_auto_parallel()
     x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
     x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
@@ -199,7 +199,7 @@ def test_six_matmul_save_auto():
     set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_save_file="./strategy_stage1_auto.ckpt")
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-
+    net.set_auto_parallel()
     x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
     x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     _executor.compile(net, x1, x6)
@@ -258,7 +258,7 @@ def test_six_matmul_load_auto():
     strategy5 = ((2, 2), (2, 2))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-
+    net.set_auto_parallel()
     x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
     x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_sum_as_loss.py b/tests/ut/python/parallel/test_sum_as_loss.py
index 3f33584b95..b5dc332eba 100644
--- a/tests/ut/python/parallel/test_sum_as_loss.py
+++ b/tests/ut/python/parallel/test_sum_as_loss.py
@@ -31,6 +31,12 @@ class GradWrap(nn.Cell):
     def construct(self, x, y, bias):
         return C.grad_all(self.network)(x, y, bias)
 
+
+def compile(net, x, y, bias):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, bias)
+
+
 def test_sum_as_loss():
     class Net(nn.Cell):
         def __init__(self, strategy0, strategy1):
@@ -53,7 +59,7 @@ def test_sum_as_loss():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
 
 
 def test_sum_as_loss2():
@@ -78,4 +84,4 @@ def test_sum_as_loss2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    _executor.compile(net, x, y, bias)
+    compile(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py
index b2b48f0424..c32f46a269 100644
--- a/tests/ut/python/parallel/test_two_matmul.py
+++ b/tests/ut/python/parallel/test_two_matmul.py
@@ -43,6 +43,11 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
+def compile(net, x, y, b):
+    net.set_auto_parallel()
+    _executor.compile(net, x, y, b)
+
+
 # model_parallel test
 def test_two_matmul():
     class Net(nn.Cell):
@@ -66,7 +71,8 @@ def test_two_matmul():
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
     
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
+
 
 def test_two_matmul_repeated_calculation1():
     class Net(nn.Cell):
@@ -89,7 +95,7 @@ def test_two_matmul_repeated_calculation1():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
 
 
 def test_two_matmul_repeated_calculation2():
@@ -113,4 +119,4 @@ def test_two_matmul_repeated_calculation2():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)
+    compile(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_two_weights_parameter.py b/tests/ut/python/parallel/test_two_weights_parameter.py
index 8104bf3b52..df61f3543f 100644
--- a/tests/ut/python/parallel/test_two_weights_parameter.py
+++ b/tests/ut/python/parallel/test_two_weights_parameter.py
@@ -74,5 +74,5 @@ def test_two_weights_parameter():
 
     train_net = OneStepCell(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
+    train_net.set_auto_parallel()
     _executor.compile(train_net, x, b)
diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
index 36c94ae1c6..382195e3b9 100644
--- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py
+++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
@@ -70,6 +70,7 @@ def test_virtual_dataset_3_input():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 2048]), dtype=ms.float32)
+    net.set_auto_parallel()
     _executor.compile(net, x, y, b)