From 14fe72f38379fe769c4dc920028f327de47d302d Mon Sep 17 00:00:00 2001
From: Yi Huaijie <yihuaijie@huawei.com>
Date: Fri, 22 May 2020 09:13:09 +0800
Subject: [PATCH] fix pylint warnings

---
 .../st/auto_parallel/onehot_model_parallel.py | 308 ++++----
 .../soft_entropy_loss_expand_parallel.py      | 551 +++++++-------
 tests/st/auto_parallel/test_expand_loss.py    |  52 +-
 .../test_model_parallel_onehot.py             |  43 +-
 .../test_resnet50_expand_loss_2p.py           |  35 +-
 tests/ut/python/communication/__init__.py     |  34 +-
 tests/ut/python/communication/test_comm.py    |   1 -
 .../communication/test_data_parallel_lenet.py |   3 +-
 tests/ut/python/parallel/__init__.py          |   2 +-
 .../add_relu/_test_add_relu_parallel_4p.py    | 356 ++++-----
 .../_test_conv2d_parallel_4p.py               | 712 +++++++++---------
 .../dropout/_test_dropout_parallel_4p.py      | 240 +++---
 .../hcom/_test_allgather_4p.py                | 308 ++++----
 .../hcom/_test_allreduce_4p.py                | 350 ++++-----
 .../_test_l2normalize_parallel_4p.py          | 413 +++++-----
 .../loss/_test_loss_parallel_4p.py            | 391 +++++-----
 .../matmul/_test_matmul_parallel_4p.py        | 658 ++++++++--------
 .../max/_test_max_parallel_4p.py              | 427 ++++++-----
 .../need_fix_test_mul_softmax_parallel_4p.py  | 401 +++++-----
 .../onehot/_test_onehot_parallel_4p.py        | 296 ++++----
 .../prelu/_test_prelu_parallel_4p.py          | 412 +++++-----
 .../_test_reducemean_parallel_4p.py           | 505 +++++++------
 .../reshape/_test_reshape_parallel_4p.py      | 412 +++++-----
 .../transpose/_test_transpose_parallel_4p.py  | 471 ++++++------
 .../parallel/test_add_relu_redistribution.py  |   6 +-
 .../python/parallel/test_allreduce_fusion.py  |  41 +-
 tests/ut/python/parallel/test_alltoall.py     |   3 +-
 tests/ut/python/parallel/test_arithmetic.py   |  40 +-
 .../parallel/test_auto_parallel_BN_PReLU.py   |   1 -
 .../parallel/test_auto_parallel_arithmetic.py |  11 +-
 ...t_auto_parallel_assign_sub_with_ref_key.py |   3 +-
 .../test_auto_parallel_double_subgraphs.py    |   1 -
 .../test_auto_parallel_four_matmul.py         |   8 +-
 .../parallel/test_auto_parallel_inference.py  |   2 +-
 .../test_auto_parallel_matmul_prelu.py        |   2 +-
 .../test_auto_parallel_parameter_cast.py      |   1 -
 .../test_auto_parallel_reduce_method.py       |   8 +-
 .../parallel/test_auto_parallel_reshape.py    |   4 +-
 .../parallel/test_auto_parallel_rhombus.py    |   8 +-
 .../test_auto_parallel_softmax_loss.py        |   1 -
 .../test_auto_parallel_transformer.py         |   4 +-
 .../parallel/test_auto_parallel_two_bn.py     |  22 +-
 .../parallel/test_auto_parallel_two_matmul.py |  12 +-
 .../parallel/test_auto_star_elimination.py    |   5 +-
 tests/ut/python/parallel/test_batch_matmul.py |  14 +-
 .../parallel/test_batchnorm_batch_parallel.py |   6 +-
 .../ut/python/parallel/test_bn_prelu_cell.py  |  13 +-
 tests/ut/python/parallel/test_bool_grad.py    |   2 +-
 .../ut/python/parallel/test_broadcast_dict.py |   4 +-
 .../parallel/test_comparison_function_info.py |  22 +-
 tests/ut/python/parallel/test_dataset_util.py |   8 +-
 tests/ut/python/parallel/test_dense_matmul.py |   4 +-
 .../test_different_type_for_div_op.py         |   8 +-
 .../python/parallel/test_dropout_do_mask.py   |  12 +-
 .../parallel/test_element_wise_function.py    |  22 +-
 tests/ut/python/parallel/test_expand_dims.py  |  14 +-
 .../ut/python/parallel/test_forward_graph.py  |  12 +-
 tests/ut/python/parallel/test_gather_v2.py    |   1 -
 .../parallel/test_gather_v2_primitive.py      |  20 +-
 tests/ut/python/parallel/test_get_next.py     |  22 +-
 .../parallel/test_get_parameter_layout.py     |   4 +-
 .../test_hybird_parallel_activation.py        |  16 +-
 tests/ut/python/parallel/test_layer_norm.py   |  14 +-
 tests/ut/python/parallel/test_linear.py       |   1 -
 .../parallel/test_loss_and_optimizer.py       |  19 +-
 .../ut/python/parallel/test_matmul_tensor.py  |  10 +-
 tests/ut/python/parallel/test_neg.py          |  12 +-
 tests/ut/python/parallel/test_one_dev.py      |   2 +-
 tests/ut/python/parallel/test_one_hot_net.py  |  17 +-
 .../parallel/test_one_weight_parameter.py     |   1 -
 tests/ut/python/parallel/test_onehot.py       |   9 -
 .../parallel/test_operator_model_parallel.py  |  48 +-
 .../parallel/test_optimizer_clone_weight.py   |   6 +-
 .../ut/python/parallel/test_parameter_init.py |   2 +-
 tests/ut/python/parallel/test_prelu.py        |  14 +-
 tests/ut/python/parallel/test_prelu_cell.py   |   8 +-
 .../parallel/test_reduce_method_info.py       |  58 +-
 tests/ut/python/parallel/test_reshape.py      |  23 +-
 .../python/parallel/test_reshape_parameter.py |   6 +-
 tests/ut/python/parallel/test_scalar_loss.py  |   1 -
 .../test_set_auto_parallel_context.py         |  24 +-
 .../test_sigmoid_cross_entropy_with_logits.py |  12 +-
 .../test_softmax_cross_entropy_loss.py        |   9 +-
 .../python/parallel/test_split_grad_sens.py   |   9 +-
 tests/ut/python/parallel/test_squeeze_info.py |  16 +-
 tests/ut/python/parallel/test_sum_as_loss.py  |   7 +-
 tests/ut/python/parallel/test_transpose.py    |   2 -
 tests/ut/python/parallel/test_two_matmul.py   |   8 +-
 .../parallel/test_two_weights_parameter.py    |   1 -
 .../parallel/test_virtual_dataset_3_input.py  |   4 +-
 90 files changed, 4030 insertions(+), 4081 deletions(-)

diff --git a/tests/st/auto_parallel/onehot_model_parallel.py b/tests/st/auto_parallel/onehot_model_parallel.py
index d553bb15ee..b104e7d8f8 100644
--- a/tests/st/auto_parallel/onehot_model_parallel.py
+++ b/tests/st/auto_parallel/onehot_model_parallel.py
@@ -1,154 +1,154 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-import numpy as np
-import os
-import pytest
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-import mindspore.context as context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-
-device_num = 2
-device_id = int(os.getenv('DEVICE_ID'))
-rank_id = 0
-
-
-def setup_module():
-    global device_num
-    global rank_id
-    np.random.seed(0)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    context.set_context(device_id=device_id)
-    distributedTool.init()
-    device_num = distributedTool.get_group_size()
-    rank_id = distributedTool.get_rank()
-    context.set_auto_parallel_context(device_num=device_num,
-                                      global_rank=rank_id)
-
-
-def teardown_module():
-    distributedTool.release()
-
-
-class Onehot(Cell):
-    def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
-        super(Onehot, self).__init__()
-        trans_stra = None
-        if strategy:
-            trans_stra = (strategy[0],)
-        self.onehot = P.OneHot().set_strategy(strategy=strategy)
-        self.depth = depth
-        self.on_value = Tensor(on_value, ms.float32)
-        self.off_value = Tensor(off_value, ms.float32)
-        self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
-        self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
-
-    def construct(self, input, indices):
-        x = self.onehot(indices, self.depth, self.on_value, self.off_value)
-        x = self.transpose(x, (1, 0))
-        x = self.sub(input, x)
-        return x
-
-
-class DataGenerator():
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def generate_data(self, shape):
-        data = np.random.rand(*shape)
-        return data
-
-    def input_data(self, shape):
-        data = (self.generate_data(shape) * 2).astype(np.float32)
-        stra = [1] * len(shape)
-        stra[0] = device_num
-        datas = self.get_parallel_blocks(data, stra)
-        return Tensor(data), Tensor(datas[rank_id])
-
-    def label_data(self, shape, classes):
-        data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
-        stra = [1] * len(shape)
-        stra[0] = device_num
-        datas = self.get_parallel_blocks(data, stra)
-        return Tensor(data), Tensor(datas[rank_id])
-
-
-class OneHotFactory:
-    def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
-        dataGen = DataGenerator()
-        self.input_full, self.input_part = dataGen.input_data((classes, batch_size))
-        self.label_full, self.label_part = dataGen.label_data((batch_size,), classes)
-        self.depth = classes
-        self.on_value = on_value
-        self.off_value = off_value
-        self.axis = axis
-        self.strategy = strategy
-
-    def forward_mindspore_single_impl(self):
-        net = Onehot(axis=self.axis,
-                     depth=self.depth,
-                     on_value=self.on_value,
-                     off_value=self.off_value)
-        out = net(self.input_full, self.label_full)
-        return out
-
-    def forward_mindspore_parallel_impl(self):
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net = Onehot(axis=self.axis,
-                     depth=self.depth,
-                     on_value=self.on_value,
-                     off_value=self.off_value, strategy=self.strategy)
-        out = net.compile_and_run(self.input_full, self.label_full)
-        return out
-
-    def forward_cmp(self):
-        out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
-        context.reset_auto_parallel_context()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
-        context.reset_auto_parallel_context()
-        assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
-
-
-def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
-    fact = OneHotFactory(batch_size=128,
-                         classes=1024,
-                         on_value=1.000000,
-                         off_value=0.000000,
-                         axis=-1,
-                         strategy=((1, device_num), (), ()))
-    fact.forward_cmp()
-
-
-def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
-    fact = OneHotFactory(batch_size=1024,
-                         classes=128,
-                         on_value=1.000000,
-                         off_value=0.000000,
-                         axis=-1,
-                         strategy=((1, device_num), (), ()))
-    fact.forward_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import os
+import numpy as np
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+import mindspore.context as context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+
+device_num = 2
+device_id = int(os.getenv('DEVICE_ID'))
+rank_id = 0
+
+
+def setup_module():
+    global device_num
+    global rank_id
+    np.random.seed(0)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(device_id=device_id)
+    distributedTool.init()
+    device_num = distributedTool.get_group_size()
+    rank_id = distributedTool.get_rank()
+    context.set_auto_parallel_context(device_num=device_num,
+                                      global_rank=rank_id)
+
+
+def teardown_module():
+    distributedTool.release()
+
+
+class Onehot(Cell):
+    def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
+        super(Onehot, self).__init__()
+        trans_stra = None
+        if strategy:
+            trans_stra = (strategy[0],)
+        self.onehot = P.OneHot().set_strategy(strategy=strategy)
+        self.depth = depth
+        self.on_value = Tensor(on_value, ms.float32)
+        self.off_value = Tensor(off_value, ms.float32)
+        self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
+        self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
+        self.axis = axis
+
+    def construct(self, input_, indices):
+        x = self.onehot(indices, self.depth, self.on_value, self.off_value)
+        x = self.transpose(x, (1, 0))
+        x = self.sub(input_, x)
+        return x
+
+
+class DataGenerator():
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def generate_data(self, shape):
+        data = np.random.rand(*shape)
+        return data
+
+    def input_data(self, shape):
+        data = (self.generate_data(shape) * 2).astype(np.float32)
+        stra = [1] * len(shape)
+        stra[0] = device_num
+        datas = self.get_parallel_blocks(data, stra)
+        return Tensor(data), Tensor(datas[rank_id])
+
+    def label_data(self, shape, classes):
+        data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
+        stra = [1] * len(shape)
+        stra[0] = device_num
+        datas = self.get_parallel_blocks(data, stra)
+        return Tensor(data), Tensor(datas[rank_id])
+
+
+class OneHotFactory:
+    def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
+        data_gen = DataGenerator()
+        self.input_full, self.input_part = data_gen.input_data((classes, batch_size))
+        self.label_full, self.label_part = data_gen.label_data((batch_size,), classes)
+        self.depth = classes
+        self.on_value = on_value
+        self.off_value = off_value
+        self.axis = axis
+        self.strategy = strategy
+
+    def forward_mindspore_single_impl(self):
+        net = Onehot(axis=self.axis,
+                     depth=self.depth,
+                     on_value=self.on_value,
+                     off_value=self.off_value)
+        out = net(self.input_full, self.label_full)
+        return out
+
+    def forward_mindspore_parallel_impl(self):
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net = Onehot(axis=self.axis,
+                     depth=self.depth,
+                     on_value=self.on_value,
+                     off_value=self.off_value, strategy=self.strategy)
+        out = net.compile_and_run(self.input_full, self.label_full)
+        return out
+
+    def forward_cmp(self):
+        out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
+        context.reset_auto_parallel_context()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
+        context.reset_auto_parallel_context()
+        assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
+
+
+def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
+    fact = OneHotFactory(batch_size=128,
+                         classes=1024,
+                         on_value=1.000000,
+                         off_value=0.000000,
+                         axis=-1,
+                         strategy=((1, device_num), (), ()))
+    fact.forward_cmp()
+
+
+def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
+    fact = OneHotFactory(batch_size=1024,
+                         classes=128,
+                         on_value=1.000000,
+                         off_value=0.000000,
+                         axis=-1,
+                         strategy=((1, device_num), (), ()))
+    fact.forward_cmp()
diff --git a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
index b58e713a1d..f0f3d2b23a 100644
--- a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
+++ b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
@@ -1,275 +1,276 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common import dtype as mstype
-from mindspore.common.parameter import ParameterTuple, Parameter
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.ops import composite as C
-from mindspore.ops import functional as F
-from mindspore.ops import operations as P
-from mindspore.train import Model, ParallelMode
-from mindspore.train.callback import Callback
-
-np.set_printoptions(threshold=np.inf)
-device_num = 2
-device_id = int(os.getenv('DEVICE_ID'))
-rank_id = 0
-embed = 128
-classes = 32
-batch_size = 32 * 2
-MatmulParamShape = (classes, embed)
-
-
-def setup_module():
-    global device_num
-    global rank_id
-    np.random.seed(0)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    context.set_context(device_id=device_id)
-    distributedTool.init()
-    rank_id = distributedTool.get_rank()
-    device_num = distributedTool.get_group_size()
-    context.set_auto_parallel_context(device_num=device_num,
-                                      global_rank=device_id)
-
-
-def teardown_module():
-    distributedTool.release()
-
-
-class DataGenerator():
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def generate_data(self, shape):
-        size = np.cumprod(shape)[-1]
-        num_range = min(size, 1000)
-        data = (np.arange(0, size) % num_range) / num_range
-        data = np.reshape(data, shape)
-        return data
-
-    def input_data(self, shape):
-        data = (self.generate_data(shape) * 0.1).astype(np.float32)
-        stra = [1] * len(shape)
-        stra[0] = device_num
-        datas = self.get_parallel_blocks(data, stra)
-        return Tensor(data), Tensor(datas[rank_id])
-
-    def label_data(self, shape, embed):
-        data = (self.generate_data(shape) * (embed - 1)).astype(np.int32)
-        stra = [1] * len(shape)
-        stra[0] = device_num
-        datas = self.get_parallel_blocks(data, stra)
-        return Tensor(data), Tensor(datas[rank_id])
-
-
-class Dataset():
-    def __init__(self, predict, label, length=1, input_num=2):
-        self.predict = predict
-        self.label = label
-        self.index = 0
-        self.length = length
-        self.input_num = input_num
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.index >= self.length:
-            raise StopIteration
-        self.index += 1
-        if self.input_num == 2:
-            return self.predict, self.label
-        else:
-            return self.predict,
-
-    def reset(self):
-        self.index = 0
-
-    def get_dataset_size(self):
-        return self.length
-
-    def get_repeat_count(self):
-        return self.length
-
-
-class ModelCallback(Callback):
-    def __init__(self):
-        super(ModelCallback, self).__init__()
-        self.loss_list = []
-
-    def epoch_end(self, run_context, *args):
-        cb_params = run_context.original_args()
-        result = cb_params.net_outputs
-        self.loss_list.append(result.asnumpy().mean())
-
-
-class SoftmaxCrossEntropyExpand(Cell):
-    def __init__(self, sparse=False, stra_list=[]):
-        super(SoftmaxCrossEntropyExpand, self).__init__()
-        if len(stra_list) < 11:
-            stra_list = [None] * 11
-        self.exp = P.Exp()
-        self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1])
-        self.onehot = P.OneHot().set_strategy(strategy=stra_list[2])
-        self.on_value = Tensor(1.0, mstype.float32)
-        self.off_value = Tensor(0.0, mstype.float32)
-        self.div = P.Div().set_strategy(strategy=stra_list[3])
-        self.log = P.Log().set_strategy(strategy=stra_list[4])
-        self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5])
-        self.mul = P.Mul().set_strategy(strategy=stra_list[6])
-        self.mul2 = P.Mul().set_strategy(strategy=stra_list[7])
-        self.cast = P.Cast()
-        self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8])
-        self.sparse = sparse
-        self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9])
-        self.sub = P.Sub().set_strategy(strategy=stra_list[10])
-
-    def construct(self, logit, label):
-        logit_max = self.reduce_max(logit, -1)
-        exp = self.exp(self.sub(logit, logit_max))
-        exp_sum = self.reduce_sum(exp, -1)
-        softmax_result = self.div(exp, exp_sum)
-        if self.sparse:
-            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
-        softmax_result_log = self.log(softmax_result)
-        loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1)
-        loss = self.mul2(F.scalar_to_array(-1.0), loss)
-        loss = self.reduce_mean(loss, -1)
-        return loss
-
-
-class MatmulNet(Cell):
-    def __init__(self, matmul_stra=None, loss_stra_list=[]):
-        super(MatmulNet, self).__init__()
-        self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra)
-        self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list)
-        self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight")
-
-    def construct(self, x, label):
-        loss_input = self.matmul(x, self.weight)
-        out = self.loss(loss_input, label)
-        return out
-
-
-class LossFactory():
-    def __init__(self):
-        dataGen = DataGenerator()
-        self.input_full, self.input_part = dataGen.input_data((batch_size, embed))
-        self.label_full, self.label_part = dataGen.label_data((batch_size,), embed)
-
-    def single_matmul_trains(self):
-        single_callback = ModelCallback()
-        net = MatmulNet()
-        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        model = Model(net, optimizer=optimizer)
-        epoch_size = 6
-        dataset = Dataset(self.input_full, self.label_full)
-        model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False)
-        loss_value = np.array(single_callback.loss_list)
-        return loss_value
-
-    def data_parallel_matmul_trains(self):
-        parallel_callback = ModelCallback()
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net = MatmulNet()
-        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        model = Model(net, optimizer=optimizer)
-        epoch_size = 6
-        dataset = Dataset(self.input_part, self.label_part)
-        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
-        loss_value = np.array(parallel_callback.loss_list)
-        return loss_value
-
-    def model_parallel_matmul_trains(self):
-        parallel_callback = ModelCallback()
-        matmul_stra = ((1, 1), (device_num, 1))
-        reduce_max_stra = ((1, device_num),)
-        sub_stra = ((1, device_num), (1, 1))
-        exp_stra = ((1, device_num),)
-        reduce_sum_stra = ((1, device_num),)
-        div_stra = ((1, device_num), (1, 1))
-        log_stra = ((1, device_num),)
-        mul_stra = ((1, device_num), (1, device_num))
-        sum_cross_entropy_stra = ((1, device_num),)
-        mul2_stra = ((), (device_num,))
-        reduce_mean_stra = ((device_num,),)
-        onehot_stra = ((1, device_num), (), ())
-        loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
-                          sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
-        context.set_auto_parallel_context(parallel_mode="auto_parallel")
-        net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
-        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        model = Model(net, optimizer=optimizer)
-        epoch_size = 6
-        dataset = Dataset(self.input_part, self.label_part)
-        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
-        loss_value = np.array(parallel_callback.loss_list)
-        return loss_value
-
-    def mix_parallel_matmul_trains(self):
-        parallel_callback = ModelCallback()
-        matmul_stra = ((device_num, 1), (1, 1))
-        reduce_max_stra = ((1, device_num),)
-        sub_stra = ((device_num, 1), (device_num, 1))
-        exp_stra = ((1, device_num),)
-        reduce_sum_stra = ((1, device_num),)
-        div_stra = ((1, device_num), (1, 1))
-        log_stra = ((1, device_num),)
-        mul_stra = ((1, device_num), (1, device_num))
-        sum_cross_entropy_stra = ((1, device_num),)
-        mul2_stra = ((), (device_num,))
-        reduce_mean_stra = ((device_num,),)
-        onehot_stra = ((1, device_num), (), ())
-        loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
-                          sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
-        context.set_auto_parallel_context(parallel_mode="auto_parallel")
-        net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
-        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        model = Model(net, optimizer=optimizer)
-        epoch_size = 6
-        dataset = Dataset(self.input_part, self.label_part)
-        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
-        loss_value = np.array(parallel_callback.loss_list)
-        return loss_value
-
-
-def test_all_trains():
-    loss_factory = LossFactory()
-    context.reset_auto_parallel_context()
-    single_loss = loss_factory.single_matmul_trains()
-    model_parallel_loss = loss_factory.model_parallel_matmul_trains()
-    mix_parallel_loss = loss_factory.mix_parallel_matmul_trains()
-    assert allclose(single_loss, model_parallel_loss)
-    assert allclose(single_loss, mix_parallel_loss)
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import os
+import numpy as np
+from numpy import allclose
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common import dtype as mstype
+from mindspore.common.parameter import Parameter
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+from mindspore.train import Model
+from mindspore.train.callback import Callback
+
+np.set_printoptions(threshold=np.inf)
+device_num = 2
+device_id = int(os.getenv('DEVICE_ID'))
+rank_id = 0
+embed = 128
+classes = 32
+batch_size = 32 * 2
+MatmulParamShape = (classes, embed)
+
+
+def setup_module():
+    global device_num
+    global rank_id
+    np.random.seed(0)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(device_id=device_id)
+    distributedTool.init()
+    rank_id = distributedTool.get_rank()
+    device_num = distributedTool.get_group_size()
+    context.set_auto_parallel_context(device_num=device_num,
+                                      global_rank=device_id)
+
+
+def teardown_module():
+    distributedTool.release()
+
+
+class DataGenerator():
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def generate_data(self, shape):
+        size = np.cumprod(shape)[-1]
+        num_range = min(size, 1000)
+        data = (np.arange(0, size) % num_range) / num_range
+        data = np.reshape(data, shape)
+        return data
+
+    def input_data(self, shape):
+        data = (self.generate_data(shape) * 0.1).astype(np.float32)
+        stra = [1] * len(shape)
+        stra[0] = device_num
+        datas = self.get_parallel_blocks(data, stra)
+        return Tensor(data), Tensor(datas[rank_id])
+
+    def label_data(self, shape, embed_):
+        data = (self.generate_data(shape) * (embed_ - 1)).astype(np.int32)
+        stra = [1] * len(shape)
+        stra[0] = device_num
+        datas = self.get_parallel_blocks(data, stra)
+        return Tensor(data), Tensor(datas[rank_id])
+
+
+class Dataset():
+    def __init__(self, predict, label, length=1, input_num=2):
+        self.predict = predict
+        self.label = label
+        self.index = 0
+        self.length = length
+        self.input_num = input_num
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.index >= self.length:
+            raise StopIteration
+        self.index += 1
+        if self.input_num == 2:
+            return (self.predict, self.label)
+        return (self.predict,)
+
+    def reset(self):
+        self.index = 0
+
+    def get_dataset_size(self):
+        return self.length
+
+    def get_repeat_count(self):
+        return self.length
+
+
+class ModelCallback(Callback):
+    def __init__(self):
+        super(ModelCallback, self).__init__()
+        self.loss_list = []
+
+    def epoch_end(self, run_context):
+        cb_params = run_context.original_args()
+        result = cb_params.net_outputs
+        self.loss_list.append(result.asnumpy().mean())
+
+
+class SoftmaxCrossEntropyExpand(Cell):
+    def __init__(self, sparse=False, stra_list=None):
+        super(SoftmaxCrossEntropyExpand, self).__init__()
+        if stra_list is None:
+            stra_list = []
+        if len(stra_list) < 11:
+            stra_list = [None] * 11
+        self.exp = P.Exp()
+        self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1])
+        self.onehot = P.OneHot().set_strategy(strategy=stra_list[2])
+        self.on_value = Tensor(1.0, mstype.float32)
+        self.off_value = Tensor(0.0, mstype.float32)
+        self.div = P.Div().set_strategy(strategy=stra_list[3])
+        self.log = P.Log().set_strategy(strategy=stra_list[4])
+        self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5])
+        self.mul = P.Mul().set_strategy(strategy=stra_list[6])
+        self.mul2 = P.Mul().set_strategy(strategy=stra_list[7])
+        self.cast = P.Cast()
+        self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8])
+        self.sparse = sparse
+        self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9])
+        self.sub = P.Sub().set_strategy(strategy=stra_list[10])
+
+    def construct(self, logit, label):
+        logit_max = self.reduce_max(logit, -1)
+        exp = self.exp(self.sub(logit, logit_max))
+        exp_sum = self.reduce_sum(exp, -1)
+        softmax_result = self.div(exp, exp_sum)
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        softmax_result_log = self.log(softmax_result)
+        loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1)
+        loss = self.mul2(F.scalar_to_array(-1.0), loss)
+        loss = self.reduce_mean(loss, -1)
+        return loss
+
+
+class MatmulNet(Cell):
+    def __init__(self, matmul_stra=None, loss_stra_list=None):
+        super(MatmulNet, self).__init__()
+        if loss_stra_list is None:
+            loss_stra_list = []
+        self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra)
+        self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list)
+        self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight")
+
+    def construct(self, x, label):
+        loss_input = self.matmul(x, self.weight)
+        out = self.loss(loss_input, label)
+        return out
+
+
+class LossFactory():
+    def __init__(self):
+        data_gen = DataGenerator()
+        self.input_full, self.input_part = data_gen.input_data((batch_size, embed))
+        self.label_full, self.label_part = data_gen.label_data((batch_size,), embed)
+
+    def single_matmul_trains(self):
+        single_callback = ModelCallback()
+        net = MatmulNet()
+        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+        model = Model(net, optimizer=optimizer)
+        epoch_size = 6
+        dataset = Dataset(self.input_full, self.label_full)
+        model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False)
+        loss_value = np.array(single_callback.loss_list)
+        return loss_value
+
+    def data_parallel_matmul_trains(self):
+        parallel_callback = ModelCallback()
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net = MatmulNet()
+        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+        model = Model(net, optimizer=optimizer)
+        epoch_size = 6
+        dataset = Dataset(self.input_part, self.label_part)
+        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
+        loss_value = np.array(parallel_callback.loss_list)
+        return loss_value
+
+    def model_parallel_matmul_trains(self):
+        parallel_callback = ModelCallback()
+        matmul_stra = ((1, 1), (device_num, 1))
+        reduce_max_stra = ((1, device_num),)
+        sub_stra = ((1, device_num), (1, 1))
+        exp_stra = ((1, device_num),)
+        reduce_sum_stra = ((1, device_num),)
+        div_stra = ((1, device_num), (1, 1))
+        log_stra = ((1, device_num),)
+        mul_stra = ((1, device_num), (1, device_num))
+        sum_cross_entropy_stra = ((1, device_num),)
+        mul2_stra = ((), (device_num,))
+        reduce_mean_stra = ((device_num,),)
+        onehot_stra = ((1, device_num), (), ())
+        loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
+                          sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
+        context.set_auto_parallel_context(parallel_mode="auto_parallel")
+        net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
+        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+        model = Model(net, optimizer=optimizer)
+        epoch_size = 6
+        dataset = Dataset(self.input_part, self.label_part)
+        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
+        loss_value = np.array(parallel_callback.loss_list)
+        return loss_value
+
+    def mix_parallel_matmul_trains(self):
+        parallel_callback = ModelCallback()
+        matmul_stra = ((device_num, 1), (1, 1))
+        reduce_max_stra = ((1, device_num),)
+        sub_stra = ((device_num, 1), (device_num, 1))
+        exp_stra = ((1, device_num),)
+        reduce_sum_stra = ((1, device_num),)
+        div_stra = ((1, device_num), (1, 1))
+        log_stra = ((1, device_num),)
+        mul_stra = ((1, device_num), (1, device_num))
+        sum_cross_entropy_stra = ((1, device_num),)
+        mul2_stra = ((), (device_num,))
+        reduce_mean_stra = ((device_num,),)
+        onehot_stra = ((1, device_num), (), ())
+        loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
+                          sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
+        context.set_auto_parallel_context(parallel_mode="auto_parallel")
+        net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
+        optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+        model = Model(net, optimizer=optimizer)
+        epoch_size = 6
+        dataset = Dataset(self.input_part, self.label_part)
+        model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
+        loss_value = np.array(parallel_callback.loss_list)
+        return loss_value
+
+
+def test_all_trains():
+    loss_factory = LossFactory()
+    context.reset_auto_parallel_context()
+    single_loss = loss_factory.single_matmul_trains()
+    model_parallel_loss = loss_factory.model_parallel_matmul_trains()
+    mix_parallel_loss = loss_factory.mix_parallel_matmul_trains()
+    assert allclose(single_loss, model_parallel_loss)
+    assert allclose(single_loss, mix_parallel_loss)
diff --git a/tests/st/auto_parallel/test_expand_loss.py b/tests/st/auto_parallel/test_expand_loss.py
index ed309ee2b8..d5148e35e2 100644
--- a/tests/st/auto_parallel/test_expand_loss.py
+++ b/tests/st/auto_parallel/test_expand_loss.py
@@ -1,26 +1,26 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-import os
-import pytest
-
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.env_single
-def test_expand_loss():
-    sh_path = os.path.split(os.path.realpath(__file__))[0]
-    ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
-    assert (ret == 0)
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import pytest
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_single
+def test_expand_loss():
+    sh_path = os.path.split(os.path.realpath(__file__))[0]
+    ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
+    assert ret == 0
diff --git a/tests/st/auto_parallel/test_model_parallel_onehot.py b/tests/st/auto_parallel/test_model_parallel_onehot.py
index f0ad888ecc..8f5d2dbe6f 100644
--- a/tests/st/auto_parallel/test_model_parallel_onehot.py
+++ b/tests/st/auto_parallel/test_model_parallel_onehot.py
@@ -1,22 +1,21 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-import os
-import pytest
-
-
-def test_expand_loss():
-    ret = os.system("sh run_onehot_model_parallel.sh")
-    assert (ret == 0)
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import os
+
+
+def test_expand_loss():
+    ret = os.system("sh run_onehot_model_parallel.sh")
+    assert ret == 0
diff --git a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
index cb03a5b032..e316f6ae06 100644
--- a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
+++ b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 # ============================================================================
 
-import numpy as np
 import os
+import numpy as np
 import pytest
 
 import mindspore.common.dtype as mstype
@@ -37,31 +37,29 @@ init()
 context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL)
 
 
-def weight_variable(shape, factor=0.1):
+def weight_variable():
     return One()
 
 
 def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
-    init_value = weight_variable((out_channels, in_channels, 3, 3))
+    init_value = weight_variable()
     return nn.Conv2d(in_channels, out_channels,
                      kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
 
 
 def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
-    init_value = weight_variable((out_channels, in_channels, 1, 1))
+    init_value = weight_variable()
     return nn.Conv2d(in_channels, out_channels,
                      kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
 
 
 def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
-    init_value = weight_variable((out_channels, in_channels, 7, 7))
+    init_value = weight_variable()
     return nn.Conv2d(in_channels, out_channels,
                      kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
 
 
 def _fused_bn(channels, momentum=0.9):
-    init_weight = weight_variable((channels,))
-    init_bias = weight_variable((channels,))
     return nn.BatchNorm2d(channels, momentum=momentum)
 
 
@@ -210,8 +208,8 @@ class ResNet(nn.Cell):
 
         self.mean = P.ReduceMean(keep_dims=True)
         self.end_point = nn.Dense(2048, num_classes, has_bias=True,
-                                  weight_init=weight_variable((num_classes, 2048)),
-                                  bias_init=weight_variable((num_classes,)))
+                                  weight_init=weight_variable(),
+                                  bias_init=weight_variable())
         self.squeeze = P.Squeeze()
         self.cast = P.Cast()
 
@@ -345,9 +343,8 @@ class Dataset():
             raise StopIteration
         self.index += 1
         if self.input_num == 2:
-            return self.predict, self.label
-        else:
-            return self.predict,
+            return (self.predict, self.label)
+        return (self.predict,)
 
     def reset(self):
         self.index = 0
@@ -364,7 +361,7 @@ class ModelCallback(Callback):
         super(ModelCallback, self).__init__()
         self.loss_list = []
 
-    def epoch_end(self, run_context, *args):
+    def epoch_end(self, run_context):
         cb_params = run_context.original_args()
         result = cb_params.net_outputs
         self.loss_list.append(result.asnumpy().mean())
@@ -376,9 +373,9 @@ class ModelCallback(Callback):
 def test_train_feed(num_classes=8192):
     set_algo_parameters(elementwise_op_strategy_follow=True)
     parallel_callback = ModelCallback()
-    dataGen = DataGenerator()
-    input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
-    label_full, label_part = dataGen.label_data((32 * 2,))
+    data_gen = DataGenerator()
+    _, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
+    _, label_part = data_gen.label_data((32 * 2,))
     dataset = Dataset(input_part, label_part)
     net = resnet50(num_classes)
     loss = SoftmaxCrossEntropyExpand(sparse=True)
@@ -396,9 +393,9 @@ def test_train_feed(num_classes=8192):
 def test_train_feed2(num_classes=1001):
     set_algo_parameters(elementwise_op_strategy_follow=True)
     parallel_callback = ModelCallback()
-    dataGen = DataGenerator()
-    input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
-    label_full, label_part = dataGen.label_data((32 * 2,))
+    data_gen = DataGenerator()
+    _, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
+    _, label_part = data_gen.label_data((32 * 2,))
     dataset = Dataset(input_part, label_part)
     net = resnet50(num_classes)
     loss = SoftmaxCrossEntropyExpand(sparse=True)
diff --git a/tests/ut/python/communication/__init__.py b/tests/ut/python/communication/__init__.py
index 6cb7088820..83f9a36dcc 100644
--- a/tests/ut/python/communication/__init__.py
+++ b/tests/ut/python/communication/__init__.py
@@ -1,17 +1,17 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-
-sys.path.append("../../..")
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+sys.path.append("../../..")
diff --git a/tests/ut/python/communication/test_comm.py b/tests/ut/python/communication/test_comm.py
index c9d20674f3..7688adb41a 100644
--- a/tests/ut/python/communication/test_comm.py
+++ b/tests/ut/python/communication/test_comm.py
@@ -25,7 +25,6 @@ from mindspore.nn import Dense
 from mindspore.nn import Momentum
 from mindspore.nn import ReLU
 from mindspore.nn import TrainOneStepCell, WithLossCell
-from mindspore.ops.operations import Split
 from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter
 from mindspore.ops.operations.comm_ops import Broadcast
 
diff --git a/tests/ut/python/communication/test_data_parallel_lenet.py b/tests/ut/python/communication/test_data_parallel_lenet.py
index a9840cd734..7a5062b941 100755
--- a/tests/ut/python/communication/test_data_parallel_lenet.py
+++ b/tests/ut/python/communication/test_data_parallel_lenet.py
@@ -16,8 +16,8 @@
 @File   : test_data_parallel_lenet.py
 @Desc   : test data parallel lenet
 """
-import numpy as np
 import os
+import numpy as np
 
 import mindspore.context as context
 import mindspore.nn as nn
@@ -80,7 +80,6 @@ def test_lenet5_train_step_training_pynative():
     context.reset_auto_parallel_context()
     context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
                                       device_num=8, mirror_mean=True)
-    size = 3
     predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([1, 10]).astype(np.float32))
     DatasetLenet(predict, label, 2)
diff --git a/tests/ut/python/parallel/__init__.py b/tests/ut/python/parallel/__init__.py
index 653fde4f3f..edd469899e 100644
--- a/tests/ut/python/parallel/__init__.py
+++ b/tests/ut/python/parallel/__init__.py
@@ -19,7 +19,7 @@ from mindspore.parallel._utils import _reset_op_id
 from mindspore.parallel.algo_parameter_config import reset_algo_parameters
 
 
-def setup_module(module):
+def setup_module():
     auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
     reset_cost_model_context()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
index 32e19be33c..b1a4423b32 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
@@ -1,178 +1,178 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class AddRelu(Cell):
-    def __init__(self, strategy0=None, strategy1=None):
-        super(AddRelu, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.relu = P.ReLU(strategy=strategy1)
-
-    def construct(self, x, z):
-        out = self.add(x, z)
-        return self.relu(out)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class AddReluFactory:
-    def __init__(self, input_shape, strategy0, strategy1):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = 1.0
-        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
-                                         input_shape).astype(np.float32)
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        need_dev_num = 1
-        need_dev_num_ = 1
-        for s in strategy0[1]:
-            need_dev_num = need_dev_num * s
-        for s in strategy1[1]:
-            need_dev_num_ = need_dev_num_ * s
-        self.x_id = device_id % need_dev_num
-        self.y_id = device_id % need_dev_num
-        self.out_id = device_id % need_dev_num_
-
-    def forward_mindspore_impl(self):
-        net = AddRelu()
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(self.input_np2, ms.float32)
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        output_grad = Tensor(self.output_grad_np)
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        net = AddRelu()
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
-        output_grad = Tensor(output_grads[self.out_id])
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(self.input_np2, ms.float32)
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
-                              parallel_inputs_run=[x1, y1, output_grad])
-        return input_grad
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-@pytest.mark.reid_forward
-def test_reid_add_relu_input_256_64():
-    stra0 = (0, (2, 2), ())
-    stra1 = (0, (2, 2))
-    fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
-    fact.forward_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_grad_add_relu_input_256_64():
-    stra0 = (0, (2, 2), ())
-    stra1 = (0, (2, 2))
-    fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import pytest
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class AddRelu(Cell):
+    def __init__(self, strategy0=None, strategy1=None):
+        super(AddRelu, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.relu = P.ReLU(strategy=strategy1)
+
+    def construct(self, x, z):
+        out = self.add(x, z)
+        return self.relu(out)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class AddReluFactory:
+    def __init__(self, input_shape, strategy0, strategy1):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = 1.0
+        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
+                                         input_shape).astype(np.float32)
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        need_dev_num = 1
+        need_dev_num_ = 1
+        for s in strategy0[1]:
+            need_dev_num = need_dev_num * s
+        for s in strategy1[1]:
+            need_dev_num_ = need_dev_num_ * s
+        self.x_id = device_id % need_dev_num
+        self.y_id = device_id % need_dev_num
+        self.out_id = device_id % need_dev_num_
+
+    def forward_mindspore_impl(self):
+        net = AddRelu()
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(self.input_np2, ms.float32)
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        output_grad = Tensor(self.output_grad_np)
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        net = AddRelu()
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
+        output_grad = Tensor(output_grads[self.out_id])
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(self.input_np2, ms.float32)
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
+                              parallel_inputs_run=[x1, y1, output_grad])
+        return input_grad
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        _ = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        _ = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+@pytest.mark.reid_forward
+def test_reid_add_relu_input_256_64():
+    stra0 = (0, (2, 2), ())
+    stra1 = (0, (2, 2))
+    fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
+    fact.forward_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_grad_add_relu_input_256_64():
+    stra0 = (0, (2, 2), ())
+    stra1 = (0, (2, 2))
+    fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
index 0a839f45e8..b492312da6 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
@@ -1,356 +1,356 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-from numpy import allclose
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore._checkparam import check_bool, twice
-from mindspore.common.initializer import initializer
-from mindspore.common.parameter import Parameter
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class _Conv(Cell):
-    r"""Applies a N-D convolution over an input signal composed of several input
-       planes.
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 pad_mode,
-                 padding,
-                 dilation,
-                 group,
-                 has_bias,
-                 weight_init,
-                 bias_init):
-        super(_Conv, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.pad_mode = pad_mode
-        self.padding = padding
-        self.dilation = dilation
-        self.group = group
-        self.has_bias = has_bias
-        if not (isinstance(in_channels, int) and in_channels > 0):
-            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
-                             + str(in_channels) + ', should be a int and greater than 0.')
-        if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
-                (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
-                kernel_size[0] < 1 or kernel_size[1] < 1:
-            raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
-                             + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
-        if in_channels % group != 0:
-            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
-                             'attr \'group\' of \'Conv2D\' Op.')
-        if out_channels % group != 0:
-            raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
-                             'attr \'group\' of \'Conv2D\' Op.')
-
-        self.weight = Parameter(initializer(
-            weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
-
-        if check_bool(has_bias):
-            self.bias = Parameter(initializer(
-                bias_init, [out_channels]), name='bias')
-        else:
-            if bias_init != 'zeros':
-                print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
-            self.bias = None
-
-    def construct(self, *inputs):
-        raise NotImplementedError
-
-
-class Conv2d(_Conv):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 pad_mode='same',
-                 padding=0,
-                 dilation=1,
-                 group=1,
-                 has_bias=False,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 strategy=None):
-        kernel_size = twice(kernel_size)
-        super(Conv2d, self).__init__(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride,
-            pad_mode,
-            padding,
-            dilation,
-            group,
-            has_bias,
-            weight_init,
-            bias_init)
-        self.add = P.TensorAdd(strategy)
-        self.conv2d = P.Conv2D(out_channel=self.out_channels,
-                               kernel_size=self.kernel_size,
-                               mode=1,
-                               pad_mode=self.pad_mode,
-                               pad=self.padding,
-                               stride=self.stride,
-                               dilation=self.dilation,
-                               group=self.group,
-                               strategy=None)
-        self.bias_add = P.BiasAdd()
-
-    def construct(self, input1, input2):
-        x = self.add(input1, input2)
-        if self.has_bias:
-            return self.bias_add(self.conv2d(x, self.weight),
-                                 self.bias)
-        return self.conv2d(x, self.weight)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, input1, input2, output_grad):
-        return grad_all_with_sens(self.network)(input1, input2, output_grad)
-
-
-class Conv2dFactory:
-    def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
-        self.in_n, self.in_c, self.in_h, self.in_w = input_shape
-        self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
-        self.stride = stride
-        self.pad_mode = pad_mode
-        self.padding = padding
-        self.dilation = dilation
-        self.group = group
-        self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
-        prefix = ""
-        input_size = 1
-        filter_size = 1
-        for s in input_shape:
-            prefix = prefix + str(s) + "_"
-            input_size = input_size * s
-        self.prefix = prefix
-        for s in filter_shape:
-            filter_size = filter_size * s
-        number_range1 = min(10, input_size)
-        number_range2 = min(10, filter_size)
-        self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
-            np.float16)
-        self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
-            np.float16)
-        self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
-            np.float16)
-        self.has_bias = has_bias
-        if self.has_bias is True:
-            self.bias_np = np.arange(0, self.out_c).astype(np.float16)
-
-        self.out_shape = (128, 64, 56, 56)
-        out_size = 1
-        for s in self.out_shape:
-            out_size = out_size * s
-        number_range3 = min(10, out_size)
-        self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
-                                         self.out_shape).astype(np.float16)
-        self.x_id = device_id % 4
-        self.y_id = device_id % 4
-        self.out_strategy = self.strategy0[1]
-        self.out_id = device_id % 4
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_conv2d_mindspore_impl(self):
-        input1 = Tensor(self.input_np1)
-        input2 = Tensor(self.input_np2)
-        weight = Tensor(self.weight_np)
-        if self.has_bias:
-            bias = Tensor(self.bias_np)
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=True, weight_init=weight,
-                         bias_init=bias)
-        else:
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=False, weight_init=weight)
-        out = net(input1, input2)
-        return out.asnumpy()
-
-    def forward_conv2d_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        weight = Tensor(self.weight_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        if self.has_bias:
-            bias = Tensor(self.bias_np)
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=True, weight_init=weight,
-                         bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
-        else:
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=False, weight_init=weight,
-                         strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_conv2d_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        weight = Tensor(self.weight_np)
-        output_grad = Tensor(self.output_grad_np)
-        if self.has_bias:
-            bias = Tensor(self.bias_np)
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=True, weight_init=weight,
-                         bias_init=bias, )
-        else:
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=False, weight_init=weight)
-
-        grad_net = Grad(net)
-        grad_net.set_train()
-        out_grad = grad_net(x, y, output_grad)
-        return out_grad
-
-    def grad_conv2d_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        weight = Tensor(self.weight_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad = Tensor(self.output_grad_np)
-        output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        output_grad1 = Tensor(output_grads[self.out_id])
-        if self.has_bias:
-            bias = Tensor(self.bias_np)
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=True, weight_init=weight,
-                         bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
-        else:
-            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
-                         kernel_size=(self.kernel_h, self.kernel_w),
-                         stride=self.stride, pad_mode=self.pad_mode,
-                         padding=self.padding, dilation=self.dilation,
-                         group=self.group, has_bias=False, weight_init=weight,
-                         strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
-
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_train()
-        grad_net.set_auto_parallel()
-        out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
-                            parallel_inputs_run=[x1, y1, output_grad1])
-        return out_grad
-
-    def forward_conv2d_cmp(self):
-        out_mindspore = self.forward_conv2d_mindspore_impl()
-        out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
-
-    def grad_conv2d_cmp(self):
-        input_grad_mindspore = self.grad_conv2d_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
-        assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
-        assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
-
-
-def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
-    fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
-                         filter_shape=(64, 64, 1, 1),
-                         stride=2, pad_mode='valid', padding=0,
-                         dilation=1, group=1, has_bias=False)
-    fact.forward_conv2d_cmp()
-
-
-def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
-    fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
-                         filter_shape=(64, 64, 1, 1),
-                         stride=2, pad_mode='valid', padding=0,
-                         dilation=1, group=1, has_bias=False)
-    fact.grad_conv2d_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from numpy import allclose
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore._checkparam import check_bool, twice
+from mindspore.common.initializer import initializer
+from mindspore.common.parameter import Parameter
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class _Conv(Cell):
+    r"""Applies a N-D convolution over an input signal composed of several input
+       planes.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 pad_mode,
+                 padding,
+                 dilation,
+                 group,
+                 has_bias,
+                 weight_init,
+                 bias_init):
+        super(_Conv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.pad_mode = pad_mode
+        self.padding = padding
+        self.dilation = dilation
+        self.group = group
+        self.has_bias = has_bias
+        if not (isinstance(in_channels, int) and in_channels > 0):
+            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
+                             + str(in_channels) + ', should be a int and greater than 0.')
+        if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
+                (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
+                kernel_size[0] < 1 or kernel_size[1] < 1:
+            raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
+                             + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
+        if in_channels % group != 0:
+            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
+                             'attr \'group\' of \'Conv2D\' Op.')
+        if out_channels % group != 0:
+            raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
+                             'attr \'group\' of \'Conv2D\' Op.')
+
+        self.weight = Parameter(initializer(
+            weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
+
+        if check_bool(has_bias):
+            self.bias = Parameter(initializer(
+                bias_init, [out_channels]), name='bias')
+        else:
+            if bias_init != 'zeros':
+                print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
+            self.bias = None
+
+    def construct(self, *inputs):
+        raise NotImplementedError
+
+
+class Conv2d(_Conv):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 pad_mode='same',
+                 padding=0,
+                 dilation=1,
+                 group=1,
+                 has_bias=False,
+                 weight_init='normal',
+                 bias_init='zeros',
+                 strategy=None):
+        kernel_size = twice(kernel_size)
+        super(Conv2d, self).__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            pad_mode,
+            padding,
+            dilation,
+            group,
+            has_bias,
+            weight_init,
+            bias_init)
+        self.add = P.TensorAdd(strategy)
+        self.conv2d = P.Conv2D(out_channel=self.out_channels,
+                               kernel_size=self.kernel_size,
+                               mode=1,
+                               pad_mode=self.pad_mode,
+                               pad=self.padding,
+                               stride=self.stride,
+                               dilation=self.dilation,
+                               group=self.group,
+                               strategy=None)
+        self.bias_add = P.BiasAdd()
+
+    def construct(self, input1, input2):
+        x = self.add(input1, input2)
+        if self.has_bias:
+            return self.bias_add(self.conv2d(x, self.weight),
+                                 self.bias)
+        return self.conv2d(x, self.weight)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, input1, input2, output_grad):
+        return grad_all_with_sens(self.network)(input1, input2, output_grad)
+
+
+class Conv2dFactory:
+    def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
+        self.in_n, self.in_c, self.in_h, self.in_w = input_shape
+        self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
+        self.stride = stride
+        self.pad_mode = pad_mode
+        self.padding = padding
+        self.dilation = dilation
+        self.group = group
+        self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
+        prefix = ""
+        input_size = 1
+        filter_size = 1
+        for s in input_shape:
+            prefix = prefix + str(s) + "_"
+            input_size = input_size * s
+        self.prefix = prefix
+        for s in filter_shape:
+            filter_size = filter_size * s
+        number_range1 = min(10, input_size)
+        number_range2 = min(10, filter_size)
+        self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
+            np.float16)
+        self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
+            np.float16)
+        self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
+            np.float16)
+        self.has_bias = has_bias
+        if self.has_bias is True:
+            self.bias_np = np.arange(0, self.out_c).astype(np.float16)
+
+        self.out_shape = (128, 64, 56, 56)
+        out_size = 1
+        for s in self.out_shape:
+            out_size = out_size * s
+        number_range3 = min(10, out_size)
+        self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
+                                         self.out_shape).astype(np.float16)
+        self.x_id = device_id % 4
+        self.y_id = device_id % 4
+        self.out_strategy = self.strategy0[1]
+        self.out_id = device_id % 4
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_conv2d_mindspore_impl(self):
+        input1 = Tensor(self.input_np1)
+        input2 = Tensor(self.input_np2)
+        weight = Tensor(self.weight_np)
+        if self.has_bias:
+            bias = Tensor(self.bias_np)
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=True, weight_init=weight,
+                         bias_init=bias)
+        else:
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=False, weight_init=weight)
+        out = net(input1, input2)
+        return out.asnumpy()
+
+    def forward_conv2d_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        weight = Tensor(self.weight_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        if self.has_bias:
+            bias = Tensor(self.bias_np)
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=True, weight_init=weight,
+                         bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
+        else:
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=False, weight_init=weight,
+                         strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_conv2d_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        weight = Tensor(self.weight_np)
+        output_grad = Tensor(self.output_grad_np)
+        if self.has_bias:
+            bias = Tensor(self.bias_np)
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=True, weight_init=weight,
+                         bias_init=bias,)
+        else:
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=False, weight_init=weight)
+
+        grad_net = Grad(net)
+        grad_net.set_train()
+        out_grad = grad_net(x, y, output_grad)
+        return out_grad
+
+    def grad_conv2d_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        weight = Tensor(self.weight_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad = Tensor(self.output_grad_np)
+        output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        output_grad1 = Tensor(output_grads[self.out_id])
+        if self.has_bias:
+            bias = Tensor(self.bias_np)
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=True, weight_init=weight,
+                         bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
+        else:
+            net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
+                         kernel_size=(self.kernel_h, self.kernel_w),
+                         stride=self.stride, pad_mode=self.pad_mode,
+                         padding=self.padding, dilation=self.dilation,
+                         group=self.group, has_bias=False, weight_init=weight,
+                         strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
+
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_train()
+        grad_net.set_auto_parallel()
+        out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
+                            parallel_inputs_run=[x1, y1, output_grad1])
+        return out_grad
+
+    def forward_conv2d_cmp(self):
+        out_mindspore = self.forward_conv2d_mindspore_impl()
+        out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
+
+    def grad_conv2d_cmp(self):
+        input_grad_mindspore = self.grad_conv2d_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
+        assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
+        assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
+
+
+def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
+    fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
+                         filter_shape=(64, 64, 1, 1),
+                         stride=2, pad_mode='valid', padding=0,
+                         dilation=1, group=1, has_bias=False)
+    fact.forward_conv2d_cmp()
+
+
+def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
+    fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
+                         filter_shape=(64, 64, 1, 1),
+                         stride=2, pad_mode='valid', padding=0,
+                         dilation=1, group=1, has_bias=False)
+    fact.grad_conv2d_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
index c735b28c43..41991aac74 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
@@ -1,120 +1,120 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.nn import Dropout
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Net(Cell):
-    def __init__(self, keep_prob, seed0, seed1, strategy=None):
-        super(Net, self).__init__()
-        self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
-
-    def construct(self, input):
-        x = self.drop(input)
-        return x
-
-
-# pylint: disable=comparison-with-itself
-class DropoutFactory:
-    def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
-        size = 1
-        prefix = ""
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(10, size)
-        self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
-        self.keep_prob = keep_prob
-        self.seed0 = seed0
-        self.seed1 = seed1
-        self.strategy0 = strategy0
-        need_dev_num = 1
-        for s in strategy0[1]:
-            need_dev_num = need_dev_num * s
-        self.x_id = device_id % need_dev_num
-        self.out_id = device_id % need_dev_num
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def d4_tensor_compare(self, input, out_me):
-        [a, b, c, d] = input.shape
-        for i in range(a):
-            for j in range(b):
-                for k in range(c):
-                    for e in range(d):
-                        if out_me[i, j, k, e] == 0:
-                            assert True == True
-                        else:
-                            assert np.allclose(out_me[i, j, k, e], input[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np)
-        inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        net = Net(0.4, 0, 0, strategy=self.strategy0)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
-        return out.asnumpy()
-
-    def forward_cmp(self):
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
-        self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
-
-
-def test_reid_dropout_forward_seed_F32_64_512_8_8():
-    fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
-    fact.forward_cmp()
-
-
-def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
-    fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
-    fact.forward_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.nn import Dropout
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Net(Cell):
+    def __init__(self, keep_prob, seed0, seed1, strategy=None):
+        super(Net, self).__init__()
+        self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
+
+    def construct(self, input_):
+        x = self.drop(input_)
+        return x
+
+
+# pylint: disable=comparison-with-itself
+class DropoutFactory:
+    def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
+        size = 1
+        prefix = ""
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(10, size)
+        self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
+        self.keep_prob = keep_prob
+        self.seed0 = seed0
+        self.seed1 = seed1
+        self.strategy0 = strategy0
+        need_dev_num = 1
+        for s in strategy0[1]:
+            need_dev_num = need_dev_num * s
+        self.x_id = device_id % need_dev_num
+        self.out_id = device_id % need_dev_num
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def d4_tensor_compare(self, input_, out_me):
+        [a, b, c, d] = input_.shape
+        for i in range(a):
+            for j in range(b):
+                for k in range(c):
+                    for e in range(d):
+                        if out_me[i, j, k, e] == 0:
+                            assert True
+                        else:
+                            assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np)
+        inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        net = Net(0.4, 0, 0, strategy=self.strategy0)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
+        return out.asnumpy()
+
+    def forward_cmp(self):
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
+        self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
+
+
+def test_reid_dropout_forward_seed_F32_64_512_8_8():
+    fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
+    fact.forward_cmp()
+
+
+def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
+    fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
+    fact.forward_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
index 006cc6925d..893ee1cd38 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
@@ -1,154 +1,154 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class MatmulSingle(Cell):
-    def __init__(self, transpose_a=False, transpose_b=False):
-        super(MatmulSingle, self).__init__()
-        self.matmul = P.MatMul(transpose_a, transpose_b)
-        self.pow = P.Pow()
-        self.reduce_sum = P.ReduceSum()
-
-    def construct(self, x, y):
-        out = self.matmul(x, y)
-        out = self.pow(out, 2.0)
-        out = self.reduce_sum(out, None)
-        return out
-
-
-class MatmulAllgather(Cell):
-    def __init__(self, group, transpose_a=False, transpose_b=False):
-        super(MatmulAllgather, self).__init__()
-        self.allgather = P.AllGather(group=group)
-        self.matmul = P.MatMul(transpose_a, transpose_b)
-        self.pow = P.Pow()
-        self.reduce_sum = P.ReduceSum()
-        self.allreduce = P.AllReduce(group=group)
-
-    def construct(self, x, y):
-        x = self.allgather(x)
-        out = self.matmul(x, y)
-        out = self.pow(out, 2.0)
-        out = self.reduce_sum(out, None)
-        out = self.allreduce(out)
-        return out
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, sens):
-        return grad_all_with_sens(self.network)(x, y, sens)
-
-
-class MatmulAllgatherFactory:
-    def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
-        self.inputx = self.GenValue(inputx_shape, 10)
-        self.inputy = self.GenValue(inputy_shape, 20)
-        self.x_stra = x_stra
-        self.y_stra = y_stra
-        stra_size = 1
-        for s in x_stra:
-            stra_size = stra_size * s
-        self.stra_size = stra_size
-
-    def GenValue(self, input_shape, delta):
-        size = 1
-        for s in input_shape:
-            size = size * s
-        number_range = min(100, size)
-        input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
-        return input_np
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def grad_mindspore_impl_single(self):
-        x = Tensor(self.inputx)
-        y = Tensor(self.inputy)
-        sens = Tensor(1.0, dtype=ms.float32)
-        net = MatmulSingle()
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, sens)
-        return input_grad
-
-    def grad_mindspore_impl_reduce(self):
-        inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
-        inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
-        x = Tensor(inputxs[device_id % self.stra_size])
-        y = Tensor(inputys[device_id % self.stra_size])
-        repeat_num = device_num / self.stra_size
-        v = self.stra_size * repeat_num * repeat_num * repeat_num
-        sens = Tensor(1.0 / v, dtype=ms.float32)
-        net = MatmulAllgather("hccl_world_group")
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, sens)
-        return input_grad
-
-    def grad_cmp(self):
-        single_results = self.grad_mindspore_impl_single()
-        reduce_results = self.grad_mindspore_impl_reduce()
-        single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
-        reduce_result0 = reduce_results[0].asnumpy()
-        single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
-        reduce_result1 = reduce_results[1].asnumpy()
-        assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
-        assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
-
-
-def test_reduce_grad():
-    inputx_shape = (64, 32)
-    inputy_shape = (32, 64)
-    fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class MatmulSingle(Cell):
+    def __init__(self, transpose_a=False, transpose_b=False):
+        super(MatmulSingle, self).__init__()
+        self.matmul = P.MatMul(transpose_a, transpose_b)
+        self.pow = P.Pow()
+        self.reduce_sum = P.ReduceSum()
+
+    def construct(self, x, y):
+        out = self.matmul(x, y)
+        out = self.pow(out, 2.0)
+        out = self.reduce_sum(out, None)
+        return out
+
+
+class MatmulAllgather(Cell):
+    def __init__(self, group, transpose_a=False, transpose_b=False):
+        super(MatmulAllgather, self).__init__()
+        self.allgather = P.AllGather(group=group)
+        self.matmul = P.MatMul(transpose_a, transpose_b)
+        self.pow = P.Pow()
+        self.reduce_sum = P.ReduceSum()
+        self.allreduce = P.AllReduce(group=group)
+
+    def construct(self, x, y):
+        x = self.allgather(x)
+        out = self.matmul(x, y)
+        out = self.pow(out, 2.0)
+        out = self.reduce_sum(out, None)
+        out = self.allreduce(out)
+        return out
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, sens):
+        return grad_all_with_sens(self.network)(x, y, sens)
+
+
+class MatmulAllgatherFactory:
+    def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
+        self.inputx = self.gen_value(inputx_shape, 10)
+        self.inputy = self.gen_value(inputy_shape, 20)
+        self.x_stra = x_stra
+        self.y_stra = y_stra
+        stra_size = 1
+        for s in x_stra:
+            stra_size = stra_size * s
+        self.stra_size = stra_size
+
+    def gen_value(self, input_shape, delta):
+        size = 1
+        for s in input_shape:
+            size = size * s
+        number_range = min(100, size)
+        input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
+        return input_np
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def grad_mindspore_impl_single(self):
+        x = Tensor(self.inputx)
+        y = Tensor(self.inputy)
+        sens = Tensor(1.0, dtype=ms.float32)
+        net = MatmulSingle()
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, sens)
+        return input_grad
+
+    def grad_mindspore_impl_reduce(self):
+        inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
+        inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
+        x = Tensor(inputxs[device_id % self.stra_size])
+        y = Tensor(inputys[device_id % self.stra_size])
+        repeat_num = device_num / self.stra_size
+        v = self.stra_size * repeat_num * repeat_num * repeat_num
+        sens = Tensor(1.0 / v, dtype=ms.float32)
+        net = MatmulAllgather("hccl_world_group")
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, sens)
+        return input_grad
+
+    def grad_cmp(self):
+        single_results = self.grad_mindspore_impl_single()
+        reduce_results = self.grad_mindspore_impl_reduce()
+        single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
+        reduce_result0 = reduce_results[0].asnumpy()
+        single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
+        reduce_result1 = reduce_results[1].asnumpy()
+        assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
+        assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
+
+
+def test_reduce_grad():
+    inputx_shape = (64, 32)
+    inputy_shape = (32, 64)
+    fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
index b206746930..6ea30fac2d 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
@@ -1,175 +1,175 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class MatmulSingle(Cell):
-    def __init__(self, transpose_a=False, transpose_b=False):
-        super(MatmulSingle, self).__init__()
-        self.matmul1 = P.MatMul(transpose_a, transpose_b)
-        self.matmul2 = P.MatMul(transpose_a, transpose_b)
-        self.pow = P.Pow()
-        self.reduce_sum = P.ReduceSum()
-
-    def construct(self, x, y, z):
-        out = self.matmul1(x, y)
-        out = self.matmul2(out, z)
-        out = self.pow(out, 2.0)
-        out = self.reduce_sum(out, None)
-        return out
-
-
-class MatmulReduce(Cell):
-    def __init__(self, group, transpose_a=False, transpose_b=False):
-        super(MatmulReduce, self).__init__()
-        self.matmul1 = P.MatMul(transpose_a, transpose_b)
-        self.allreduce1 = P.AllReduce(group=group)
-        self.matmul2 = P.MatMul(transpose_a, transpose_b)
-        self.pow = P.Pow()
-        self.reduce_sum = P.ReduceSum()
-        self.allreduce2 = P.AllReduce(group=group)
-
-    def construct(self, x, y, z):
-        out = self.matmul1(x, y)
-        out = self.allreduce1(out)
-        out = self.matmul2(out, z)
-        out = self.pow(out, 2.0)
-        out = self.reduce_sum(out, None)
-        out = self.allreduce2(out)
-        return out
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, z, sens):
-        return grad_all_with_sens(self.network)(x, y, z, sens)
-
-
-class MatmulReduceFactory:
-    def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
-        self.inputx = self.GenValue(inputx_shape, 10)
-        self.inputy = self.GenValue(inputy_shape, 20)
-        self.inputz = self.GenValue(inputz_shape, 30)
-        self.x_stra = x_stra
-        self.y_stra = y_stra
-        self.z_stra = z_stra
-        stra_size = 1
-        for s in x_stra:
-            stra_size = stra_size * s
-        self.stra_size = stra_size
-
-    def GenValue(self, input_shape, delta):
-        size = 1
-        for s in input_shape:
-            size = size * s
-        number_range = min(100, size)
-        input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
-        return input_np
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def grad_mindspore_impl_single(self):
-        x = Tensor(self.inputx)
-        y = Tensor(self.inputy)
-        z = Tensor(self.inputz)
-        sens = Tensor(1.0, dtype=ms.float32)
-        net = MatmulSingle()
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, z, sens)
-        return input_grad
-
-    def grad_mindspore_impl_reduce(self):
-        inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
-        inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
-        inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
-        x = Tensor(inputxs[device_id % self.stra_size])
-        y = Tensor(inputys[device_id % self.stra_size])
-        z = Tensor(inputzs[device_id % self.stra_size])
-        repeat_num = device_num / self.stra_size
-        v = self.stra_size * repeat_num * repeat_num * repeat_num
-        sens = Tensor(1.0 / v, dtype=ms.float32)
-        net = MatmulReduce("hccl_world_group")
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, z, sens)
-        return input_grad
-
-    def grad_cmp(self):
-        single_results = self.grad_mindspore_impl_single()
-        reduce_results = self.grad_mindspore_impl_reduce()
-        single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
-        reduce_result0 = reduce_results[0].asnumpy()
-        single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
-        reduce_result1 = reduce_results[1].asnumpy()
-        single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
-        reduce_result2 = reduce_results[2].asnumpy()
-        assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
-        assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
-        assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
-
-
-def test_reduce_grad():
-    inputx_shape = (32, 64)
-    inputy_shape = (64, 64)
-    inputz_shape = (64, 32)
-    fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
-    fact.grad_cmp()
-
-
-def test_reduce_grad_repeat():
-    inputx_shape = (32, 64)
-    inputy_shape = (64, 64)
-    inputz_shape = (64, 32)
-    fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class MatmulSingle(Cell):
+    def __init__(self, transpose_a=False, transpose_b=False):
+        super(MatmulSingle, self).__init__()
+        self.matmul1 = P.MatMul(transpose_a, transpose_b)
+        self.matmul2 = P.MatMul(transpose_a, transpose_b)
+        self.pow = P.Pow()
+        self.reduce_sum = P.ReduceSum()
+
+    def construct(self, x, y, z):
+        out = self.matmul1(x, y)
+        out = self.matmul2(out, z)
+        out = self.pow(out, 2.0)
+        out = self.reduce_sum(out, None)
+        return out
+
+
+class MatmulReduce(Cell):
+    def __init__(self, group, transpose_a=False, transpose_b=False):
+        super(MatmulReduce, self).__init__()
+        self.matmul1 = P.MatMul(transpose_a, transpose_b)
+        self.allreduce1 = P.AllReduce(group=group)
+        self.matmul2 = P.MatMul(transpose_a, transpose_b)
+        self.pow = P.Pow()
+        self.reduce_sum = P.ReduceSum()
+        self.allreduce2 = P.AllReduce(group=group)
+
+    def construct(self, x, y, z):
+        out = self.matmul1(x, y)
+        out = self.allreduce1(out)
+        out = self.matmul2(out, z)
+        out = self.pow(out, 2.0)
+        out = self.reduce_sum(out, None)
+        out = self.allreduce2(out)
+        return out
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, z, sens):
+        return grad_all_with_sens(self.network)(x, y, z, sens)
+
+
+class MatmulReduceFactory:
+    def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
+        self.inputx = self.gen_value(inputx_shape, 10)
+        self.inputy = self.gen_value(inputy_shape, 20)
+        self.inputz = self.gen_value(inputz_shape, 30)
+        self.x_stra = x_stra
+        self.y_stra = y_stra
+        self.z_stra = z_stra
+        stra_size = 1
+        for s in x_stra:
+            stra_size = stra_size * s
+        self.stra_size = stra_size
+
+    def gen_value(self, input_shape, delta):
+        size = 1
+        for s in input_shape:
+            size = size * s
+        number_range = min(100, size)
+        input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
+        return input_np
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def grad_mindspore_impl_single(self):
+        x = Tensor(self.inputx)
+        y = Tensor(self.inputy)
+        z = Tensor(self.inputz)
+        sens = Tensor(1.0, dtype=ms.float32)
+        net = MatmulSingle()
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, z, sens)
+        return input_grad
+
+    def grad_mindspore_impl_reduce(self):
+        inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
+        inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
+        inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
+        x = Tensor(inputxs[device_id % self.stra_size])
+        y = Tensor(inputys[device_id % self.stra_size])
+        z = Tensor(inputzs[device_id % self.stra_size])
+        repeat_num = device_num / self.stra_size
+        v = self.stra_size * repeat_num * repeat_num * repeat_num
+        sens = Tensor(1.0 / v, dtype=ms.float32)
+        net = MatmulReduce("hccl_world_group")
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, z, sens)
+        return input_grad
+
+    def grad_cmp(self):
+        single_results = self.grad_mindspore_impl_single()
+        reduce_results = self.grad_mindspore_impl_reduce()
+        single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
+        reduce_result0 = reduce_results[0].asnumpy()
+        single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
+        reduce_result1 = reduce_results[1].asnumpy()
+        single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
+        reduce_result2 = reduce_results[2].asnumpy()
+        assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
+        assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
+        assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
+
+
+def test_reduce_grad():
+    inputx_shape = (32, 64)
+    inputy_shape = (64, 64)
+    inputz_shape = (64, 32)
+    fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
+    fact.grad_cmp()
+
+
+def test_reduce_grad_repeat():
+    inputx_shape = (32, 64)
+    inputy_shape = (64, 64)
+    inputz_shape = (64, 32)
+    fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
index a9cd30cccf..866efdeba7 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
@@ -1,207 +1,206 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class L2normalize(Cell):
-    def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
-        super(L2normalize, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
-
-    def construct(self, x, y):
-        out = self.add(x, y)
-        out = self.l2norm(out)
-        return out
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class L2normalizeFactory:
-    def __init__(self, input_shape, axis, strategy0, strategy1):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
-            np.float32)
-        target_shape = input_shape
-        self.target_shape = target_shape
-        target_size = 1
-        for s in target_shape:
-            target_size = target_size * s
-        number_range = min(1000, target_size)
-        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
-                                         target_shape).astype(np.float32)
-        self.axis = axis
-        self.epsilon = 1e-4
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        out_strategy = strategy1[1]
-        self.out_strategy = out_strategy
-        need_dev_num0 = 1
-        need_dev_num1 = 1
-        for s in strategy0[1]:
-            need_dev_num0 = need_dev_num0 * s
-        for s in out_strategy:
-            need_dev_num1 = need_dev_num1 * s
-        self.x_id = device_id % need_dev_num0
-        self.y_id = device_id % need_dev_num0
-        self.out_id = device_id % need_dev_num1
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        net = L2normalize(self.axis, self.epsilon)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        net = L2normalize(self.axis, self.epsilon)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad1 = Tensor(outgrads[self.out_id])
-        net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
-                              parallel_inputs_run=[x1, y1, output_grad1])
-        return input_grad
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-def test_reid_l2normalize_input_128_512():
-    input_shape = (128, 512)
-    axis = 0
-    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
-    fact.forward_cmp()
-
-
-def test_reid_l2normalize_grad_input_128_512():
-    input_shape = (128, 512)
-    axis = 0
-    fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
-    fact.grad_cmp()
-
-
-def test_reid_l2normalize_input_128_512_repeat():
-    input_shape = (128, 512)
-    axis = 0
-    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
-    fact.forward_cmp()
-
-
-def test_reid_l2normalize_grad_input_128_512_repeat():
-    input_shape = (128, 512)
-    axis = 0
-    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class L2normalize(Cell):
+    def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
+        super(L2normalize, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
+
+    def construct(self, x, y):
+        out = self.add(x, y)
+        out = self.l2norm(out)
+        return out
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class L2normalizeFactory:
+    def __init__(self, input_shape, axis, strategy0, strategy1):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
+            np.float32)
+        target_shape = input_shape
+        self.target_shape = target_shape
+        target_size = 1
+        for s in target_shape:
+            target_size = target_size * s
+        number_range = min(1000, target_size)
+        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
+                                         target_shape).astype(np.float32)
+        self.axis = axis
+        self.epsilon = 1e-4
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        out_strategy = strategy1[1]
+        self.out_strategy = out_strategy
+        need_dev_num0 = 1
+        need_dev_num1 = 1
+        for s in strategy0[1]:
+            need_dev_num0 = need_dev_num0 * s
+        for s in out_strategy:
+            need_dev_num1 = need_dev_num1 * s
+        self.x_id = device_id % need_dev_num0
+        self.y_id = device_id % need_dev_num0
+        self.out_id = device_id % need_dev_num1
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        net = L2normalize(self.axis, self.epsilon)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        net = L2normalize(self.axis, self.epsilon)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad1 = Tensor(outgrads[self.out_id])
+        net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
+                              parallel_inputs_run=[x1, y1, output_grad1])
+        return input_grad
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+def test_reid_l2normalize_input_128_512():
+    input_shape = (128, 512)
+    axis = 0
+    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
+    fact.forward_cmp()
+
+
+def test_reid_l2normalize_grad_input_128_512():
+    input_shape = (128, 512)
+    axis = 0
+    fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
+    fact.grad_cmp()
+
+
+def test_reid_l2normalize_input_128_512_repeat():
+    input_shape = (128, 512)
+    axis = 0
+    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
+    fact.forward_cmp()
+
+
+def test_reid_l2normalize_grad_input_128_512_repeat():
+    input_shape = (128, 512)
+    axis = 0
+    fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
index b26b718068..6b8288e4bd 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
@@ -1,196 +1,195 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class AddRelu(Cell):
-    def __init__(self, strategy0=None, strategy1=None):
-        super(AddRelu, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.relu = P.ReLU(strategy=strategy1)
-
-    def construct(self, x, y):
-        out = self.add(x, y)
-        out = self.relu(out)
-        return out
-
-
-class NetWithLoss(Cell):
-    def __init__(self, network, strategy2=None):
-        super(NetWithLoss, self).__init__()
-        self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
-        self.network = network
-
-    def construct(self, x, y, b):
-        predict = self.network(x, y)
-        return self.loss(predict, b)[0]
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, b):
-        return grad_all(self.network)(x, y, b)
-
-
-class AddReluFactory:
-    def __init__(self, input_shape, strategy0, strategy1, strategy2):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
-            np.float32)
-        target_shape = input_shape
-        self.target_shape = target_shape
-        target_size = 1
-        for s in target_shape:
-            target_size = target_size * s
-        number_range = min(10, target_size)
-        self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
-            np.float32)
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        self.strategy2 = strategy2
-        out_strategy = strategy1[1]
-        self.out_strategy = out_strategy
-        need_dev_num0 = 1
-        need_dev_num1 = 1
-        for s in strategy0[1]:
-            need_dev_num0 = need_dev_num0 * s
-        for s in out_strategy:
-            need_dev_num1 = need_dev_num1 * s
-        self.x_id = device_id % need_dev_num0
-        self.y_id = device_id % need_dev_num0
-        self.out_id = device_id % need_dev_num1
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def grad_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        net = AddRelu()
-        net_with_loss = NetWithLoss(net)
-        grad_net = Grad(net_with_loss)
-        grad_net.set_train()
-        input_grads = []
-        for i in range(0, 3):
-            input_grad = grad_net(x, y, output_grad)
-            input_grads.append(input_grad)
-        return input_grads
-
-    def grad_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad1 = Tensor(outgrads[self.out_id])
-        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
-        net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
-        grad_net = Grad(net_with_loss)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grads = []
-        for i in range(0, 3):
-            input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
-                                  parallel_inputs_run=[x1, y1, output_grad1])
-            input_grads.append(input_grad)
-        return input_grads
-
-    def grad_cmp(self):
-        input_grad_mindspores = self.grad_mindspore_impl()
-        input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
-        for i in range(0, len(input_grad_mindspores)):
-            input_grad_mindspore = input_grad_mindspores[i]
-            input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
-            input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-            input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-            input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-            input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-            input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-            input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
-                    input_grad_blocks_0[self.x_id])
-            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
-                    input_grad_blocks_1[self.y_id])
-            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
-                    input_grad_mindspore_parallel0)
-            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
-                    input_grad_mindspore_parallel1)
-            assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-            assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-def test_reid_l2normalize_grad_input_128_512():
-    input_shape = (128, 512)
-    fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
-                          strategy2=(0, (4, 1), (4, 1)))
-    fact.grad_cmp()
-
-
-def test_reid_l2normalize_grad_input_128_512_stridesplit():
-    input_shape = (128, 512)
-    fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
-                          strategy2=(0, (4, 1), (4, 1)))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class AddRelu(Cell):
+    def __init__(self, strategy0=None, strategy1=None):
+        super(AddRelu, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.relu = P.ReLU(strategy=strategy1)
+
+    def construct(self, x, y):
+        out = self.add(x, y)
+        out = self.relu(out)
+        return out
+
+
+class NetWithLoss(Cell):
+    def __init__(self, network, strategy2=None):
+        super(NetWithLoss, self).__init__()
+        self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
+        self.network = network
+
+    def construct(self, x, y, b):
+        predict = self.network(x, y)
+        return self.loss(predict, b)[0]
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, b):
+        return grad_all(self.network)(x, y, b)
+
+
+class AddReluFactory:
+    def __init__(self, input_shape, strategy0, strategy1, strategy2):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
+            np.float32)
+        target_shape = input_shape
+        self.target_shape = target_shape
+        target_size = 1
+        for s in target_shape:
+            target_size = target_size * s
+        number_range = min(10, target_size)
+        self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
+            np.float32)
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        self.strategy2 = strategy2
+        out_strategy = strategy1[1]
+        self.out_strategy = out_strategy
+        need_dev_num0 = 1
+        need_dev_num1 = 1
+        for s in strategy0[1]:
+            need_dev_num0 = need_dev_num0 * s
+        for s in out_strategy:
+            need_dev_num1 = need_dev_num1 * s
+        self.x_id = device_id % need_dev_num0
+        self.y_id = device_id % need_dev_num0
+        self.out_id = device_id % need_dev_num1
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def grad_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        net = AddRelu()
+        net_with_loss = NetWithLoss(net)
+        grad_net = Grad(net_with_loss)
+        grad_net.set_train()
+        input_grads = []
+        for i in range(0, 3):
+            input_grad = grad_net(x, y, output_grad)
+            input_grads.append(input_grad)
+        return input_grads
+
+    def grad_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad1 = Tensor(outgrads[self.out_id])
+        net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
+        net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
+        grad_net = Grad(net_with_loss)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grads = []
+        for i in range(0, 3):
+            input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
+                                  parallel_inputs_run=[x1, y1, output_grad1])
+            input_grads.append(input_grad)
+        return input_grads
+
+    def grad_cmp(self):
+        input_grad_mindspores = self.grad_mindspore_impl()
+        input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
+        for i in range(0, len(input_grad_mindspores)):
+            input_grad_mindspore = input_grad_mindspores[i]
+            input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
+            input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+            input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+            input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+            input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+            input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+            input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
+                    input_grad_blocks_0[self.x_id])
+            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
+                    input_grad_blocks_1[self.y_id])
+            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
+                    input_grad_mindspore_parallel0)
+            np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
+                    input_grad_mindspore_parallel1)
+            assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+            assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+def test_reid_l2normalize_grad_input_128_512():
+    input_shape = (128, 512)
+    fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
+                          strategy2=(0, (4, 1), (4, 1)))
+    fact.grad_cmp()
+
+
+def test_reid_l2normalize_grad_input_128_512_stridesplit():
+    input_shape = (128, 512)
+    fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
+                          strategy2=(0, (4, 1), (4, 1)))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
index 8ce6860495..37ae0f72b0 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
@@ -1,329 +1,329 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-from numpy import allclose
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Matmul(Cell):
-    def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
-        super(Matmul, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy1)
-        self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
-
-    def construct(self, x, w, z):
-        out = self.add(x, z)
-        return self.matmul(out, w)
-
-
-class BatchMatMul(Cell):
-    def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
-        super(BatchMatMul, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy1)
-        self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
-
-    def construct(self, x, w, z):
-        out = self.add(x, z)
-        return self.batchmatmul(out, w)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, inputa, inputb, inputz, output_grad):
-        gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
-        return gout
-
-
-class BatchmatmulFactory:
-    def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
-        self.strategy = strategy
-        self.strategy_ = strategy_
-        inputa_size = 1
-        inputb_size = 1
-        prefix = ""
-        for s in inputa_shape:
-            prefix = prefix + str(s) + "_"
-            inputa_size = inputa_size * s
-        prefix = prefix + "and"
-        for s in inputb_shape:
-            prefix = prefix + str(s) + "_"
-            inputb_size = inputb_size * s
-        number_rangea = min(1000, inputa_size)
-        number_rangeb = min(1000, inputb_size)
-        self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
-            np.float32)
-        self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
-            np.float32)
-        self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
-        self.transpose_a = transpose_a
-        self.transpose_b = transpose_b
-
-        out_shape = []
-        device_matrix = []
-        out_strategy = []
-        if transpose_a:
-            temp = inputa_shape[-1]
-            inputa_shape[-1] = inputa_shape[-2]
-            inputa_shape[-2] = temp
-        if transpose_b:
-            temp = inputb_shape[-1]
-            inputb_shape[-1] = inputb_shape[-2]
-            inputb_shape[-2] = temp
-
-        if (len(inputa_shape) >= len(inputb_shape)):
-            out_shape = list(inputa_shape)
-            out_shape[-1] = inputb_shape[-1]
-        else:
-            out_shape = list(inputb_shape)
-            out_shape[-2] = inputa_shape[-2]
-
-        strategy1 = list(self.strategy[1])
-        strategy2 = list(self.strategy[2])
-        if transpose_a:
-            temp = strategy1[-1]
-            strategy1[-1] = strategy1[-2]
-            strategy1[-2] = temp
-        if transpose_b:
-            temp = strategy2[-1]
-            strategy2[-1] = strategy2[-2]
-            strategy2[-2] = temp
-
-        if (len(strategy1) >= len(strategy2)):
-            out_strategy = strategy1.copy()
-            out_strategy[-1] = strategy2[-1]
-        else:
-            out_strategy = strategy2.copy()
-            out_strategy[-2] = strategy1[-2]
-        device_matrix = out_strategy.copy()
-        device_matrix.insert(-1, strategy1[-1])
-        self.out_strategy = out_strategy
-
-        need_dev_num = 1
-        for s in device_matrix:
-            need_dev_num = need_dev_num * s
-        self.need_dev_num = need_dev_num
-        self.device_matrix = device_matrix
-
-        out_size = 1
-        for s in out_shape:
-            out_size = out_size * s
-        number_range = min(1000, out_size)
-        self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
-            np.float32)
-
-        device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
-        x_index = device_index[:-1].copy()
-        if transpose_a:
-            temp = x_index[-1]
-            x_index[-1] = x_index[-2]
-            x_index[-2] = temp
-        y_index = device_index[:-3].copy()
-        y_index.append(device_index[-2])
-        y_index.append(device_index[-1])
-        if transpose_b:
-            temp = y_index[-1]
-            y_index[-1] = y_index[-2]
-            y_index[-2] = temp
-
-        out_index = device_index[:-2].copy()
-        out_index.append(device_index[-1])
-
-        print(device_matrix)
-        print(device_index)
-
-        need_dev_num_ = 1
-        for s in strategy_[1]:
-            need_dev_num_ = need_dev_num_ * s
-        self.x_id = device_id % need_dev_num_
-        self.y_id = self.list_to_id(y_index, self.strategy[2])
-        self.out_id = self.list_to_id(out_index, self.out_strategy)
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    """
-    shape：每一维的上限，如（2,4,8）
-    """
-
-    def id_to_list(self, id, shape):
-        result = []
-        r = id
-        for i in range(0, len(shape)):
-            v = 1
-            for j in range(i + 1, len(shape)):
-                v = v * shape[j]
-            result.append(r // v)
-            r = r % v
-        return result
-
-    def list_to_id(self, id_list, shape):
-        result = 0
-        for i in range(0, len(id_list)):
-            v = 1
-            for j in range(i + 1, len(id_list)):
-                v = v * shape[j]
-            result = result + id_list[i] * v
-        return result
-
-    def forward_mindspore_impl(self):
-        if len(self.inputa.shape) > 2:
-            matmul = BatchMatMul(self.transpose_a, self.transpose_b)
-        else:
-            matmul = Matmul(self.transpose_a, self.transpose_b)
-        matmul.set_train()
-        out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
-        return out_me.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        if len(self.inputa.shape) > 2:
-            matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
-        else:
-            matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        x = Tensor(self.inputa)
-        y = Tensor(self.inputb)
-        z = Tensor(self.inputz)
-        xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
-        ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
-        zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
-        x1 = Tensor(xs[self.x_id])  #
-        y1 = Tensor(ys[self.y_id])  # 需要从设备矩阵推导
-        z1 = Tensor(zs[self.x_id])
-        matmul.set_train()
-        matmul.set_auto_parallel()
-        out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
-        return out_me.asnumpy()
-
-    def grad_mindspore_impl(self):
-        x = Tensor(self.inputa)
-        y = Tensor(self.inputb)
-        z = Tensor(self.inputz)
-        if len(self.inputa.shape) > 2:
-            matmul = BatchMatMul(self.transpose_a, self.transpose_b)
-        else:
-            matmul = Matmul(self.transpose_a, self.transpose_b)
-        net_me = Grad(matmul)
-        net_me.set_train()
-        out_grad_me = Tensor(self.output_grad_np)
-        out_grad = net_me(x, y, z, out_grad_me)
-        return out_grad
-
-    def grad_mindspore_parallel_impl(self):
-        if len(self.inputa.shape) > 2:
-            matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
-        else:
-            matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
-        x = Tensor(self.inputa)
-        y = Tensor(self.inputb)
-        z = Tensor(self.inputz)
-        out_grad_me = Tensor(self.output_grad_np)
-
-        xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
-        ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
-        zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
-        out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-
-        x1 = Tensor(xs[self.x_id])  # 需要从设备矩阵推导
-        y1 = Tensor(ys[self.y_id])  #
-        z1 = Tensor(zs[self.x_id])
-        out_grad1 = Tensor(out_grads[self.out_id])
-        net_me = Grad(matmul)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net_me.set_auto_parallel()
-        net_me.set_train()
-
-        out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
-                          parallel_inputs_run=[x1, y1, z1, out_grad1])
-        return out_grad
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
-        input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
-        input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
-        assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
-        assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
-        assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
-
-
-def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
-    inputa = [128, 512]
-    inputb = [2000, 512]
-    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
-    fact.forward_cmp()
-
-
-def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
-    inputa = [128, 512]
-    inputb = [2000, 512]
-    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
-    fact.grad_cmp()
-
-
-def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
-    inputa = [128, 512]
-    inputb = [2000, 512]
-    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
-    fact.forward_cmp()
-
-
-def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
-    inputa = [128, 512]
-    inputb = [2000, 512]
-    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from numpy import allclose
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Matmul(Cell):
+    def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
+        super(Matmul, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy1)
+        self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
+
+    def construct(self, x, w, z):
+        out = self.add(x, z)
+        return self.matmul(out, w)
+
+
+class BatchMatMul(Cell):
+    def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
+        super(BatchMatMul, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy1)
+        self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
+
+    def construct(self, x, w, z):
+        out = self.add(x, z)
+        return self.batchmatmul(out, w)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, inputa, inputb, inputz, output_grad):
+        gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
+        return gout
+
+
+class BatchmatmulFactory:
+    def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
+        self.strategy = strategy
+        self.strategy_ = strategy_
+        inputa_size = 1
+        inputb_size = 1
+        prefix = ""
+        for s in inputa_shape:
+            prefix = prefix + str(s) + "_"
+            inputa_size = inputa_size * s
+        prefix = prefix + "and"
+        for s in inputb_shape:
+            prefix = prefix + str(s) + "_"
+            inputb_size = inputb_size * s
+        number_rangea = min(1000, inputa_size)
+        number_rangeb = min(1000, inputb_size)
+        self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
+            np.float32)
+        self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
+            np.float32)
+        self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
+        self.transpose_a = transpose_a
+        self.transpose_b = transpose_b
+
+        out_shape = []
+        device_matrix = []
+        out_strategy = []
+        if transpose_a:
+            temp = inputa_shape[-1]
+            inputa_shape[-1] = inputa_shape[-2]
+            inputa_shape[-2] = temp
+        if transpose_b:
+            temp = inputb_shape[-1]
+            inputb_shape[-1] = inputb_shape[-2]
+            inputb_shape[-2] = temp
+
+        if len(inputa_shape) >= len(inputb_shape):
+            out_shape = list(inputa_shape)
+            out_shape[-1] = inputb_shape[-1]
+        else:
+            out_shape = list(inputb_shape)
+            out_shape[-2] = inputa_shape[-2]
+
+        strategy1 = list(self.strategy[1])
+        strategy2 = list(self.strategy[2])
+        if transpose_a:
+            temp = strategy1[-1]
+            strategy1[-1] = strategy1[-2]
+            strategy1[-2] = temp
+        if transpose_b:
+            temp = strategy2[-1]
+            strategy2[-1] = strategy2[-2]
+            strategy2[-2] = temp
+
+        if len(strategy1) >= len(strategy2):
+            out_strategy = strategy1.copy()
+            out_strategy[-1] = strategy2[-1]
+        else:
+            out_strategy = strategy2.copy()
+            out_strategy[-2] = strategy1[-2]
+        device_matrix = out_strategy.copy()
+        device_matrix.insert(-1, strategy1[-1])
+        self.out_strategy = out_strategy
+
+        need_dev_num = 1
+        for s in device_matrix:
+            need_dev_num = need_dev_num * s
+        self.need_dev_num = need_dev_num
+        self.device_matrix = device_matrix
+
+        out_size = 1
+        for s in out_shape:
+            out_size = out_size * s
+        number_range = min(1000, out_size)
+        self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
+            np.float32)
+
+        device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
+        x_index = device_index[:-1].copy()
+        if transpose_a:
+            temp = x_index[-1]
+            x_index[-1] = x_index[-2]
+            x_index[-2] = temp
+        y_index = device_index[:-3].copy()
+        y_index.append(device_index[-2])
+        y_index.append(device_index[-1])
+        if transpose_b:
+            temp = y_index[-1]
+            y_index[-1] = y_index[-2]
+            y_index[-2] = temp
+
+        out_index = device_index[:-2].copy()
+        out_index.append(device_index[-1])
+
+        print(device_matrix)
+        print(device_index)
+
+        need_dev_num_ = 1
+        for s in strategy_[1]:
+            need_dev_num_ = need_dev_num_ * s
+        self.x_id = device_id % need_dev_num_
+        self.y_id = self.list_to_id(y_index, self.strategy[2])
+        self.out_id = self.list_to_id(out_index, self.out_strategy)
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+
+    def id_to_list(self, id_, shape):
+        """
+        shape：每一维的上限，如（2,4,8）
+        """
+        result = []
+        r = id_
+        for i in range(0, len(shape)):
+            v = 1
+            for j in range(i + 1, len(shape)):
+                v = v * shape[j]
+            result.append(r // v)
+            r = r % v
+        return result
+
+    def list_to_id(self, id_list, shape):
+        result = 0
+        for i in range(0, len(id_list)):
+            v = 1
+            for j in range(i + 1, len(id_list)):
+                v = v * shape[j]
+            result = result + id_list[i] * v
+        return result
+
+    def forward_mindspore_impl(self):
+        if len(self.inputa.shape) > 2:
+            matmul = BatchMatMul(self.transpose_a, self.transpose_b)
+        else:
+            matmul = Matmul(self.transpose_a, self.transpose_b)
+        matmul.set_train()
+        out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
+        return out_me.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        if len(self.inputa.shape) > 2:
+            matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
+        else:
+            matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        x = Tensor(self.inputa)
+        y = Tensor(self.inputb)
+        z = Tensor(self.inputz)
+        xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
+        ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
+        zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
+        x1 = Tensor(xs[self.x_id])  #
+        y1 = Tensor(ys[self.y_id])  # 需要从设备矩阵推导
+        z1 = Tensor(zs[self.x_id])
+        matmul.set_train()
+        matmul.set_auto_parallel()
+        out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
+        return out_me.asnumpy()
+
+    def grad_mindspore_impl(self):
+        x = Tensor(self.inputa)
+        y = Tensor(self.inputb)
+        z = Tensor(self.inputz)
+        if len(self.inputa.shape) > 2:
+            matmul = BatchMatMul(self.transpose_a, self.transpose_b)
+        else:
+            matmul = Matmul(self.transpose_a, self.transpose_b)
+        net_me = Grad(matmul)
+        net_me.set_train()
+        out_grad_me = Tensor(self.output_grad_np)
+        out_grad = net_me(x, y, z, out_grad_me)
+        return out_grad
+
+    def grad_mindspore_parallel_impl(self):
+        if len(self.inputa.shape) > 2:
+            matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
+        else:
+            matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
+        x = Tensor(self.inputa)
+        y = Tensor(self.inputb)
+        z = Tensor(self.inputz)
+        out_grad_me = Tensor(self.output_grad_np)
+
+        xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
+        ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
+        zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
+        out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+
+        x1 = Tensor(xs[self.x_id])  # 需要从设备矩阵推导
+        y1 = Tensor(ys[self.y_id])  #
+        z1 = Tensor(zs[self.x_id])
+        out_grad1 = Tensor(out_grads[self.out_id])
+        net_me = Grad(matmul)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net_me.set_auto_parallel()
+        net_me.set_train()
+
+        out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
+                          parallel_inputs_run=[x1, y1, z1, out_grad1])
+        return out_grad
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
+        input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
+        input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
+        assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
+        assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
+        assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
+
+
+def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
+    inputa = [128, 512]
+    inputb = [2000, 512]
+    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
+    fact.forward_cmp()
+
+
+def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
+    inputa = [128, 512]
+    inputb = [2000, 512]
+    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
+    fact.grad_cmp()
+
+
+def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
+    inputa = [128, 512]
+    inputb = [2000, 512]
+    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
+    fact.forward_cmp()
+
+
+def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
+    inputa = [128, 512]
+    inputb = [2000, 512]
+    fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
index 017ee7ebc3..d4247f7319 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
@@ -1,214 +1,213 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, input1, input2, output_grad):
-        return grad_all_with_sens(self.network)(input1, input2, output_grad)
-
-
-class Max(Cell):
-    def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
-        super(Max, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
-        self.axis = axis
-
-    def construct(self, input1, input2):
-        out = self.add(input1, input2)
-        return self.reduce_max(out, self.axis)
-
-
-class MaxFactory:
-    def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        self.axis = axis
-        self.keep_dims = keep_dims
-        input_size = 1
-        prefix = ""
-        for s in input_shape:
-            prefix = prefix + str(s) + "_"
-            input_size = input_size * s
-        number_range = min(1000, input_size)
-        self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = self.input_np1.copy()
-        self.out_grad_np = None
-        out_shape = list(input_shape)
-        out_shape.pop(axis)
-        out_size = input_size / input_shape[axis]
-        number_range_ = min(1000, out_size)
-        self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
-            np.float32)
-        out_strategy = list(strategy1[1])
-        out_strategy.pop(axis)
-        self.out_strategy = out_strategy
-        need_dev_num = 1
-        need_dev_num_ = 1
-        for s in strategy0[1]:
-            need_dev_num = need_dev_num * s
-        for s in out_strategy:
-            need_dev_num_ = need_dev_num_ * s
-        self.x_id = device_id % need_dev_num
-        self.y_id = device_id % need_dev_num
-        self.out_id = device_id % need_dev_num_
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_mindspore_impl(self):
-        input1 = Tensor(self.input_np1)
-        input2 = Tensor(self.input_np2)
-        net = Max(axis=self.axis, keep_dims=self.keep_dims)
-        out = net(input1, input2)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(xs[self.x_id])
-        y1 = Tensor(ys[self.y_id])
-        net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        input1 = Tensor(self.input_np1)
-        input2 = Tensor(self.input_np2)
-        out_grad = Tensor(self.out_grad_np)
-        net = Max(axis=self.axis, keep_dims=self.keep_dims)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(input1, input2, out_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
-        out_grad = Tensor(output_grads[self.out_id])
-        xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(xs[self.x_id])
-        y1 = Tensor(ys[self.y_id])
-        net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
-                              parallel_inputs_run=[x1, y1, out_grad])
-        return input_grad
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        print(out_mindspore)
-        print(out_mindspore_parallel)
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-def test_reid_max_forward_input_256_64():
-    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
-                      strategy1=(0, (4, 1)))
-    fact.forward_cmp()
-
-
-def test_reid_max_grad_input_256_64():
-    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
-                      strategy1=(0, (4, 1)))
-    fact.grad_cmp()
-
-
-def test_reid_max_forward_input_128_64_32_32():
-    fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
-                      strategy1=(0, (2, 1, 2, 1)))
-    fact.forward_cmp()
-
-
-def test_reid_max_grad_input_128_64_32_32():
-    fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
-                      strategy1=(0, (2, 1, 2, 1)))
-    fact.grad_cmp()
-
-
-def test_reid_max_forward_input_256_64_repeat():
-    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
-                      strategy1=(0, (2, 1)))
-    fact.forward_cmp()
-
-
-def test_reid_max_grad_input_256_64_repeat():
-    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
-                      strategy1=(0, (2, 1)))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, input1, input2, output_grad):
+        return grad_all_with_sens(self.network)(input1, input2, output_grad)
+
+
+class Max(Cell):
+    def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
+        super(Max, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
+        self.axis = axis
+
+    def construct(self, input1, input2):
+        out = self.add(input1, input2)
+        return self.reduce_max(out, self.axis)
+
+
+class MaxFactory:
+    def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        self.axis = axis
+        self.keep_dims = keep_dims
+        input_size = 1
+        prefix = ""
+        for s in input_shape:
+            prefix = prefix + str(s) + "_"
+            input_size = input_size * s
+        number_range = min(1000, input_size)
+        self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = self.input_np1.copy()
+        self.out_grad_np = None
+        out_shape = list(input_shape)
+        out_shape.pop(axis)
+        out_size = input_size / input_shape[axis]
+        number_range_ = min(1000, out_size)
+        self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
+            np.float32)
+        out_strategy = list(strategy1[1])
+        out_strategy.pop(axis)
+        self.out_strategy = out_strategy
+        need_dev_num = 1
+        need_dev_num_ = 1
+        for s in strategy0[1]:
+            need_dev_num = need_dev_num * s
+        for s in out_strategy:
+            need_dev_num_ = need_dev_num_ * s
+        self.x_id = device_id % need_dev_num
+        self.y_id = device_id % need_dev_num
+        self.out_id = device_id % need_dev_num_
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_mindspore_impl(self):
+        input1 = Tensor(self.input_np1)
+        input2 = Tensor(self.input_np2)
+        net = Max(axis=self.axis, keep_dims=self.keep_dims)
+        out = net(input1, input2)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(xs[self.x_id])
+        y1 = Tensor(ys[self.y_id])
+        net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        input1 = Tensor(self.input_np1)
+        input2 = Tensor(self.input_np2)
+        out_grad = Tensor(self.out_grad_np)
+        net = Max(axis=self.axis, keep_dims=self.keep_dims)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(input1, input2, out_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
+        out_grad = Tensor(output_grads[self.out_id])
+        xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(xs[self.x_id])
+        y1 = Tensor(ys[self.y_id])
+        net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
+                              parallel_inputs_run=[x1, y1, out_grad])
+        return input_grad
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        print(out_mindspore)
+        print(out_mindspore_parallel)
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+def test_reid_max_forward_input_256_64():
+    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
+                      strategy1=(0, (4, 1)))
+    fact.forward_cmp()
+
+
+def test_reid_max_grad_input_256_64():
+    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
+                      strategy1=(0, (4, 1)))
+    fact.grad_cmp()
+
+
+def test_reid_max_forward_input_128_64_32_32():
+    fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
+                      strategy1=(0, (2, 1, 2, 1)))
+    fact.forward_cmp()
+
+
+def test_reid_max_grad_input_128_64_32_32():
+    fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
+                      strategy1=(0, (2, 1, 2, 1)))
+    fact.grad_cmp()
+
+
+def test_reid_max_forward_input_256_64_repeat():
+    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
+                      strategy1=(0, (2, 1)))
+    fact.forward_cmp()
+
+
+def test_reid_max_grad_input_256_64_repeat():
+    fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
+                      strategy1=(0, (2, 1)))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
index f37e6176d6..19bf73f38a 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
@@ -1,201 +1,200 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class MulSoftmax(Cell):
-    def __init__(self, strategy0=None, strategy1=None, axis=0):
-        super(MulSoftmax, self).__init__()
-        self.mul = P.Mul(strategy=strategy0)
-        self.softmax = P.Softmax(axis=axis, strategy=strategy1)
-
-    def construct(self, x, z):
-        out = self.mul(x, z)
-        return self.softmax(out)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class MulSoftmaxFactory:
-    def __init__(self, input_shape, strategy0, strategy1):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = 1.0
-        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
-                                         input_shape).astype(np.float32)
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        need_dev_num = 1
-        need_dev_num_ = 1
-        for s in strategy0[1]:
-            need_dev_num = need_dev_num * s
-        for s in strategy1[1]:
-            need_dev_num_ = need_dev_num_ * s
-        self.x_id = device_id % need_dev_num
-        self.y_id = device_id % need_dev_num
-        self.out_id = device_id % need_dev_num_
-
-    def forward_mindspore_impl(self):
-        net = MulSoftmax()
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(self.input_np2, ms.float32)
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        output_grad = Tensor(self.output_grad_np)
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        net = MulSoftmax()
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
-        output_grad = Tensor(output_grads[self.out_id])
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_train()
-        grad_net.set_auto_parallel()
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(self.input_np2, ms.float32)
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
-                              parallel_inputs_run=[x1, y1, output_grad])
-        return input_grad
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
-        np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
-                                                       self.strategy0[1])  # 这里由于TensorMul两个输入X1没做广播，X2做了广播
-        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-@pytest.mark.reid_forward
-def test_reid_mul_softmax_input_128x64():
-    stra0 = (0, (1, 4), ())
-    stra1 = (0, (1, 4))
-    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
-    fact.forward_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_grad_mul_softmax_input_128x64():
-    stra0 = (0, (1, 4), ())
-    stra1 = (0, (1, 4))
-    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
-    fact.grad_cmp()
-
-
-@pytest.mark.reid_forward
-def test_reid_mul_softmax_input_128x64_all_to_all():
-    stra0 = (0, (4, 1), ())
-    stra1 = (0, (1, 4))
-    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
-    fact.forward_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_grad_mul_softmax_input_128x64_all_to_all():
-    stra0 = (0, (4, 1), ())
-    stra1 = (0, (1, 4))
-    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import pytest
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class MulSoftmax(Cell):
+    def __init__(self, strategy0=None, strategy1=None, axis=0):
+        super(MulSoftmax, self).__init__()
+        self.mul = P.Mul(strategy=strategy0)
+        self.softmax = P.Softmax(axis=axis, strategy=strategy1)
+
+    def construct(self, x, z):
+        out = self.mul(x, z)
+        return self.softmax(out)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class MulSoftmaxFactory:
+    def __init__(self, input_shape, strategy0, strategy1):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = 1.0
+        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
+                                         input_shape).astype(np.float32)
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        need_dev_num = 1
+        need_dev_num_ = 1
+        for s in strategy0[1]:
+            need_dev_num = need_dev_num * s
+        for s in strategy1[1]:
+            need_dev_num_ = need_dev_num_ * s
+        self.x_id = device_id % need_dev_num
+        self.y_id = device_id % need_dev_num
+        self.out_id = device_id % need_dev_num_
+
+    def forward_mindspore_impl(self):
+        net = MulSoftmax()
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(self.input_np2, ms.float32)
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        output_grad = Tensor(self.output_grad_np)
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        net = MulSoftmax()
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
+        output_grad = Tensor(output_grads[self.out_id])
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_train()
+        grad_net.set_auto_parallel()
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(self.input_np2, ms.float32)
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
+                              parallel_inputs_run=[x1, y1, output_grad])
+        return input_grad
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
+        np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
+                                                       self.strategy0[1])  # 这里由于TensorMul两个输入X1没做广播，X2做了广播
+        assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+@pytest.mark.reid_forward
+def test_reid_mul_softmax_input_128x64():
+    stra0 = (0, (1, 4), ())
+    stra1 = (0, (1, 4))
+    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
+    fact.forward_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_grad_mul_softmax_input_128x64():
+    stra0 = (0, (1, 4), ())
+    stra1 = (0, (1, 4))
+    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
+    fact.grad_cmp()
+
+
+@pytest.mark.reid_forward
+def test_reid_mul_softmax_input_128x64_all_to_all():
+    stra0 = (0, (4, 1), ())
+    stra1 = (0, (1, 4))
+    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
+    fact.forward_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_grad_mul_softmax_input_128x64_all_to_all():
+    stra0 = (0, (4, 1), ())
+    stra1 = (0, (1, 4))
+    fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
index 9c79a0b05f..0648d769ab 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
@@ -1,149 +1,147 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Onehot(Cell):
-    def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
-        super(Onehot, self).__init__()
-        self.onehot = P.OneHot(axis, strategy=strategy)
-        self.depth = depth
-        self.on_value = Tensor(on_value, ms.float32)
-        self.off_value = Tensor(off_value, ms.float32)
-
-    def construct(self, indices):
-        return self.onehot(indices, self.depth, self.on_value, self.off_value)
-
-
-class OneHotFactory:
-    def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
-        size = 1
-        prefix = ""
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(10, size)
-        self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
-        self.depth = depth
-        self.on_value = on_value
-        self.off_value = off_value
-        self.axis = axis
-        self.dtype = dtype
-        self.strategy0 = strategy0
-        need_dev_num = 1
-        for s in strategy0[1]:
-            need_dev_num = need_dev_num * s
-        self.x_id = device_id % need_dev_num
-        self.out_id = device_id % need_dev_num
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def grad_mindspore_impl(self):
-        output_grad = Tensor(self.output_grad_np)
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2, ms.float32)
-        net = AddRelu()
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def forward_mindspore_impl(self):
-        indices = Tensor(self.input_np)
-        net = Onehot(axis=self.axis,
-                     depth=self.depth,
-                     on_value=self.on_value,
-                     off_value=self.off_value)
-        out = net(indices)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np)
-        inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        net = Onehot(axis=self.axis,
-                     depth=self.depth,
-                     on_value=self.on_value,
-                     off_value=self.off_value, strategy=self.strategy0)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
-        return out.asnumpy()
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
-
-
-def test_reid_onehot_forward_int32_128_depth13000():
-    fact = OneHotFactory(input_shape=(128,),
-                         depth=131072,
-                         on_value=1.000000,
-                         off_value=0.000000,
-                         axis=-1,
-                         dtype="float32",
-                         strategy0=(0, (2,)))
-    fact.forward_cmp()
-
-
-def test_reid_onehot_forward_int32_131072_depth127():
-    fact = OneHotFactory(input_shape=(131072,),
-                         depth=127,
-                         on_value=1.000000,
-                         off_value=0.000000,
-                         axis=-1,
-                         dtype="float32",
-                         strategy0=(0, (4,)))
-    fact.forward_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Onehot(Cell):
+    def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
+        super(Onehot, self).__init__()
+        self.onehot = P.OneHot(axis, strategy=strategy)
+        self.depth = depth
+        self.on_value = Tensor(on_value, ms.float32)
+        self.off_value = Tensor(off_value, ms.float32)
+
+    def construct(self, indices):
+        return self.onehot(indices, self.depth, self.on_value, self.off_value)
+
+
+class OneHotFactory:
+    def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
+        size = 1
+        prefix = ""
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(10, size)
+        self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
+        self.depth = depth
+        self.on_value = on_value
+        self.off_value = off_value
+        self.axis = axis
+        self.dtype = dtype
+        self.strategy0 = strategy0
+        need_dev_num = 1
+        for s in strategy0[1]:
+            need_dev_num = need_dev_num * s
+        self.x_id = device_id % need_dev_num
+        self.out_id = device_id % need_dev_num
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def grad_mindspore_impl(self):
+        output_grad = Tensor(self.output_grad_np)
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2, ms.float32)
+        net = AddRelu()
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def forward_mindspore_impl(self):
+        indices = Tensor(self.input_np)
+        net = Onehot(axis=self.axis,
+                     depth=self.depth,
+                     on_value=self.on_value,
+                     off_value=self.off_value)
+        out = net(indices)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np)
+        inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        net = Onehot(axis=self.axis,
+                     depth=self.depth,
+                     on_value=self.on_value,
+                     off_value=self.off_value, strategy=self.strategy0)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
+        return out.asnumpy()
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
+
+
+def test_reid_onehot_forward_int32_128_depth13000():
+    fact = OneHotFactory(input_shape=(128,),
+                         depth=131072,
+                         on_value=1.000000,
+                         off_value=0.000000,
+                         axis=-1,
+                         dtype="float32",
+                         strategy0=(0, (2,)))
+    fact.forward_cmp()
+
+
+def test_reid_onehot_forward_int32_131072_depth127():
+    fact = OneHotFactory(input_shape=(131072,),
+                         depth=127,
+                         on_value=1.000000,
+                         off_value=0.000000,
+                         axis=-1,
+                         dtype="float32",
+                         strategy0=(0, (4,)))
+    fact.forward_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
index 161b25430d..f0a45111bc 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
@@ -1,206 +1,206 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class PReLU(Cell):
-    def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
-        super(PReLU, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy1_)
-        self.prelu = P.PReLU(strategy=strategy_)
-
-    def construct(self, x, z, w):
-        out = self.add(x, z)
-        return self.prelu(out, w)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, input, z, w, output_grad):
-        return grad_all_with_sens(self.network)(input, z, w, output_grad)
-
-
-class PReLUFactory:
-    def __init__(self, input_shape, strategy):
-        n, c = input_shape[:2]
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
-        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
-                                         input_shape).astype(np.float32)
-        self.channel = c
-        self.weight = np.array([np.float32(0.25)] * c)
-        self.strategy = strategy
-
-    def forward_mindspore_impl(self):
-        net = PReLU(channel=self.channel, w=self.weight)
-        x = Tensor(self.input_np)
-        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
-        w = Tensor(self.weight)
-        out = net(x, z, w)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
-                    strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        x = Tensor(self.input_np)
-        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
-        w = Tensor(self.weight)
-
-        inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
-        block_id = device_id % len(inputs)
-        x1 = Tensor(inputs[block_id])
-        z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
-        w1 = Tensor(self.weight)
-
-        out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        output_grad = Tensor(self.output_grad_np)
-        x = Tensor(self.input_np)
-        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
-        w = Tensor(self.weight)
-
-        net = PReLU(channel=self.channel, w=self.weight)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, z, w, output_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
-        block_id = device_id % len(output_grads)
-        output_grad = Tensor(output_grads[block_id])
-        x = Tensor(self.input_np)
-        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
-        w = Tensor(self.weight)
-
-        net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
-                    strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-
-        grad_net.set_train()
-        inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
-        x1 = Tensor(inputs[block_id])
-        z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
-        w1 = Tensor(self.weight)
-
-        input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
-                              parallel_inputs_run=[x1, z1, w1, output_grad])
-        return input_grad
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
-        block_id = device_id % len(out_blocks)
-        assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
-        input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
-        input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
-        block_id = device_id % len(input_grad_blocks)
-        assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
-        assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-@pytest.mark.reid_grad
-def test_reid_prelu_input_128x64x112x112_repeat():
-    stra = (0, (1, 1, 2, 1), (1))
-    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
-    fact.forward_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_grad_prelu_input_128x64x112x112_repeat():
-    stra = (0, (1, 1, 2, 1), (1))
-    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
-    fact.grad_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_prelu_input_128x64x112x112_mix():
-    stra = (0, (2, 1, 1, 2), (1))
-    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
-    fact.forward_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_grad_prelu_input_128x64x112x112_mix():
-    stra = (0, (2, 1, 1, 2), (1))
-    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import pytest
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class PReLU(Cell):
+    def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
+        super(PReLU, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy1_)
+        self.prelu = P.PReLU(strategy=strategy_)
+        self.channel = channel
+
+    def construct(self, x, z, w):
+        out = self.add(x, z)
+        return self.prelu(out, w)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, input_, z, w, output_grad):
+        return grad_all_with_sens(self.network)(input_, z, w, output_grad)
+
+
+class PReLUFactory:
+    def __init__(self, input_shape, strategy):
+        n, c = input_shape[:2]
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
+        self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
+                                         input_shape).astype(np.float32)
+        self.channel = c
+        self.weight = np.array([np.float32(0.25)] * c)
+        self.strategy = strategy
+
+    def forward_mindspore_impl(self):
+        net = PReLU(channel=self.channel, w=self.weight)
+        x = Tensor(self.input_np)
+        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
+        w = Tensor(self.weight)
+        out = net(x, z, w)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
+                    strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        x = Tensor(self.input_np)
+        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
+        w = Tensor(self.weight)
+
+        inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
+        block_id = device_id % len(inputs)
+        x1 = Tensor(inputs[block_id])
+        z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
+        w1 = Tensor(self.weight)
+
+        out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        output_grad = Tensor(self.output_grad_np)
+        x = Tensor(self.input_np)
+        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
+        w = Tensor(self.weight)
+
+        net = PReLU(channel=self.channel, w=self.weight)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, z, w, output_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
+        block_id = device_id % len(output_grads)
+        output_grad = Tensor(output_grads[block_id])
+        x = Tensor(self.input_np)
+        z = Tensor(np.zeros(self.input_np.shape), ms.float32)
+        w = Tensor(self.weight)
+
+        net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
+                    strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+
+        grad_net.set_train()
+        inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
+        x1 = Tensor(inputs[block_id])
+        z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
+        w1 = Tensor(self.weight)
+
+        input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
+                              parallel_inputs_run=[x1, z1, w1, output_grad])
+        return input_grad
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
+        block_id = device_id % len(out_blocks)
+        assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
+        input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
+        input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
+        block_id = device_id % len(input_grad_blocks)
+        assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
+        assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+@pytest.mark.reid_grad
+def test_reid_prelu_input_128x64x112x112_repeat():
+    stra = (0, (1, 1, 2, 1), (1))
+    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
+    fact.forward_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_grad_prelu_input_128x64x112x112_repeat():
+    stra = (0, (1, 1, 2, 1), (1))
+    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
+    fact.grad_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_prelu_input_128x64x112x112_mix():
+    stra = (0, (2, 1, 1, 2), (1))
+    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
+    fact.forward_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_grad_prelu_input_128x64x112x112_mix():
+    stra = (0, (2, 1, 1, 2), (1))
+    fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
index 82ab74d38d..24a3227da7 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
@@ -1,253 +1,252 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose as allclose_nparray
-
-import mindspore as ms
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class GradScalar(Cell):
-    def __init__(self, network):
-        super(GradScalar, self).__init__()
-        self.network = network
-        self.sens = Tensor([1.0], dtype=ms.float32)
-
-    def construct(self, x, y):
-        return grad_all_with_sens(self.network)(x, y, self.sens)
-
-
-class ReduceMean(Cell):
-    def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
-        super(ReduceMean, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
-        self.axis = axis
-
-    def construct(self, x, y):
-        out = self.add(x, y)
-        return self.reduce_mean(out, self.axis)
-
-
-class ReduceMeanFactory:
-    def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
-            np.float32)
-        self.keep_dims = keep_dims
-        self.axis = axis
-        target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
-        target_size = 1
-        for s in target_shape:
-            target_size = target_size * s
-        number_range = min(1000, target_size)
-        self.output_grad_np = np.array([1.0], dtype=np.float32)
-        if len(target_shape) > 0:
-            self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
-                np.float32) + 1.0
-        self.shape = target_shape
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        out_strategy = []
-        axis_ = list(axis)
-        if axis_[0] == -1:
-            axis_[0] = len(input_shape) - 1
-        for i in range(0, len(input_shape)):
-            if i in axis_:
-                if keep_dims:
-                    out_strategy.append(1)
-            else:
-                out_strategy.append(strategy1[1][i])
-        self.out_strategy = out_strategy
-        need_dev_num0 = 1
-        need_dev_num1 = 1
-        for s in strategy0[1]:
-            need_dev_num0 = need_dev_num0 * s
-        for s in out_strategy:
-            need_dev_num1 = need_dev_num1 * s
-        self.x_id = device_id % need_dev_num0
-        self.y_id = device_id % need_dev_num0
-        block_id = device_id % need_dev_num0
-        device_index = self.id_to_list(block_id, self.strategy1[1])
-        print(device_index)
-        for i in axis:
-            device_index[i] = 0
-        print(device_index)
-        self.out_id = self.list_to_id(device_index, self.out_strategy)
-        print(self.out_id)
-
-    def id_to_list(self, id, shape):
-        result = []
-        r = id
-        for i in range(0, len(shape)):
-            v = 1
-            for j in range(i + 1, len(shape)):
-                v = v * shape[j]
-            result.append(r // v)
-            r = r % v
-        return result
-
-    def list_to_id(self, id_list, shape):
-        result = 0
-        for i in range(0, len(id_list)):
-            v = 1
-            for j in range(i + 1, len(id_list)):
-                v = v * shape[j]
-            result = result + id_list[i] * v
-        return result
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        out_grad = Tensor(self.output_grad_np)
-        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, out_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad1 = Tensor(outgrads[self.out_id])
-        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
-                              parallel_inputs_run=[x1, y1, output_grad1])
-        return input_grad
-
-    def forward_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-def test_reid_reducemean_input_64x16():
-    fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
-                             strategy1=(0, (4,)))
-    fact.forward_cmp()
-
-
-def test_grad_reid_reducemean_input_64x16():
-    fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
-                             strategy1=(0, (4,)))
-    fact.grad_cmp()
-
-
-def test_reid_reducemean_input_64x128x28x28():
-    fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
-                             strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
-    fact.forward_cmp()
-
-
-def test_grad_reid_reducemean_input_64x128x28x28():
-    fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
-                             strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
-    fact.grad_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from numpy import allclose as allclose_nparray
+
+import mindspore as ms
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class GradScalar(Cell):
+    def __init__(self, network):
+        super(GradScalar, self).__init__()
+        self.network = network
+        self.sens = Tensor([1.0], dtype=ms.float32)
+
+    def construct(self, x, y):
+        return grad_all_with_sens(self.network)(x, y, self.sens)
+
+
+class ReduceMean(Cell):
+    def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
+        super(ReduceMean, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
+        self.axis = axis
+
+    def construct(self, x, y):
+        out = self.add(x, y)
+        return self.reduce_mean(out, self.axis)
+
+
+class ReduceMeanFactory:
+    def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
+            np.float32)
+        self.keep_dims = keep_dims
+        self.axis = axis
+        target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
+        target_size = 1
+        for s in target_shape:
+            target_size = target_size * s
+        number_range = min(1000, target_size)
+        self.output_grad_np = np.array([1.0], dtype=np.float32)
+        if len(target_shape) > 0:
+            self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
+                np.float32) + 1.0
+        self.shape = target_shape
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        out_strategy = []
+        axis_ = list(axis)
+        if axis_[0] == -1:
+            axis_[0] = len(input_shape) - 1
+        for i in range(0, len(input_shape)):
+            if i in axis_:
+                if keep_dims:
+                    out_strategy.append(1)
+            else:
+                out_strategy.append(strategy1[1][i])
+        self.out_strategy = out_strategy
+        need_dev_num0 = 1
+        need_dev_num1 = 1
+        for s in strategy0[1]:
+            need_dev_num0 = need_dev_num0 * s
+        for s in out_strategy:
+            need_dev_num1 = need_dev_num1 * s
+        self.x_id = device_id % need_dev_num0
+        self.y_id = device_id % need_dev_num0
+        block_id = device_id % need_dev_num0
+        device_index = self.id_to_list(block_id, self.strategy1[1])
+        print(device_index)
+        for i in axis:
+            device_index[i] = 0
+        print(device_index)
+        self.out_id = self.list_to_id(device_index, self.out_strategy)
+        print(self.out_id)
+
+    def id_to_list(self, id_, shape):
+        result = []
+        r = id_
+        for i in range(0, len(shape)):
+            v = 1
+            for j in range(i + 1, len(shape)):
+                v = v * shape[j]
+            result.append(r // v)
+            r = r % v
+        return result
+
+    def list_to_id(self, id_list, shape):
+        result = 0
+        for i in range(0, len(id_list)):
+            v = 1
+            for j in range(i + 1, len(id_list)):
+                v = v * shape[j]
+            result = result + id_list[i] * v
+        return result
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        out_grad = Tensor(self.output_grad_np)
+        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, out_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad1 = Tensor(outgrads[self.out_id])
+        net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
+                              parallel_inputs_run=[x1, y1, output_grad1])
+        return input_grad
+
+    def forward_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+def test_reid_reducemean_input_64x16():
+    fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
+                             strategy1=(0, (4,)))
+    fact.forward_cmp()
+
+
+def test_grad_reid_reducemean_input_64x16():
+    fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
+                             strategy1=(0, (4,)))
+    fact.grad_cmp()
+
+
+def test_reid_reducemean_input_64x128x28x28():
+    fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
+                             strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
+    fact.forward_cmp()
+
+
+def test_grad_reid_reducemean_input_64x128x28x28():
+    fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
+                             strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
+    fact.grad_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
index 70532e5c81..cbfdd511d7 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
@@ -1,206 +1,206 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose as allclose_nparray
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class Reshape(Cell):
-    def __init__(self, target_shape, strategy0=None, strategy1=None):
-        super(Reshape, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.reshape = P.Reshape(strategy=strategy1)
-        self.shape = tuple(target_shape)
-
-    def construct(self, input1, input2):
-        x = self.add(input1, input2)
-        return self.reshape(x, self.shape)
-
-
-class ReshapeFactory:
-    def __init__(self, input_shape, target_shape, strategy0, strategy1):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
-            np.float32)
-        target_size = 1
-        for s in target_shape:
-            target_size = target_size * s
-        number_range = min(1000, target_size)
-        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
-                                         target_shape).astype(np.float32)
-        self.target_shape = target_shape
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        out_strategy = [1] * len(target_shape)
-        out_strategy[0] = strategy1[1][0]
-        self.out_strategy = out_strategy
-
-        need_dev_num0 = 1
-        need_dev_num1 = 1
-        for s in strategy0[1]:
-            need_dev_num0 = need_dev_num0 * s
-        for s in out_strategy:
-            need_dev_num1 = need_dev_num1 * s
-        self.x_id = device_id % need_dev_num0
-        self.y_id = device_id % need_dev_num0
-        self.out_id = device_id % need_dev_num1
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def forward_reshape_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        net = Reshape(self.target_shape)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_reshape_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_reshape_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        net = Reshape(self.target_shape)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def grad_reshape_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad1 = Tensor(outgrads[self.out_id])
-        net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
-                              parallel_inputs_run=[x1, y1, output_grad1])
-        return input_grad
-
-    def forward_reshape_cmp(self):
-        out_mindspore = self.forward_reshape_mindspore_impl()
-        out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_reshape_cmp(self):
-        input_grad_mindspore = self.grad_reshape_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-@pytest.mark.reid_forward
-def test_reid_reshape_input_128x512x7x7_target_128x25088():
-    fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
-                          strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
-    fact.forward_reshape_cmp()
-
-
-def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
-    fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
-                          strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
-    fact.grad_reshape_cmp()
-
-
-@pytest.mark.reid_forward
-def test_reid_reshape_input_128x64_target_128x64x1x1():
-    fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
-                          strategy1=(0, (2, 1)))
-    fact.forward_reshape_cmp()
-
-
-@pytest.mark.reid_grad
-def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
-    fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
-                          strategy1=(0, (2, 1)))
-    fact.grad_reshape_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import pytest
+from numpy import allclose as allclose_nparray
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class Reshape(Cell):
+    def __init__(self, target_shape, strategy0=None, strategy1=None):
+        super(Reshape, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.reshape = P.Reshape(strategy=strategy1)
+        self.shape = tuple(target_shape)
+
+    def construct(self, input1, input2):
+        x = self.add(input1, input2)
+        return self.reshape(x, self.shape)
+
+
+class ReshapeFactory:
+    def __init__(self, input_shape, target_shape, strategy0, strategy1):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
+            np.float32)
+        target_size = 1
+        for s in target_shape:
+            target_size = target_size * s
+        number_range = min(1000, target_size)
+        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
+                                         target_shape).astype(np.float32)
+        self.target_shape = target_shape
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        out_strategy = [1] * len(target_shape)
+        out_strategy[0] = strategy1[1][0]
+        self.out_strategy = out_strategy
+
+        need_dev_num0 = 1
+        need_dev_num1 = 1
+        for s in strategy0[1]:
+            need_dev_num0 = need_dev_num0 * s
+        for s in out_strategy:
+            need_dev_num1 = need_dev_num1 * s
+        self.x_id = device_id % need_dev_num0
+        self.y_id = device_id % need_dev_num0
+        self.out_id = device_id % need_dev_num1
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def forward_reshape_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        net = Reshape(self.target_shape)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_reshape_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_reshape_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        net = Reshape(self.target_shape)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def grad_reshape_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad1 = Tensor(outgrads[self.out_id])
+        net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
+                              parallel_inputs_run=[x1, y1, output_grad1])
+        return input_grad
+
+    def forward_reshape_cmp(self):
+        out_mindspore = self.forward_reshape_mindspore_impl()
+        out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_reshape_cmp(self):
+        input_grad_mindspore = self.grad_reshape_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+@pytest.mark.reid_forward
+def test_reid_reshape_input_128x512x7x7_target_128x25088():
+    fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
+                          strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
+    fact.forward_reshape_cmp()
+
+
+def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
+    fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
+                          strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
+    fact.grad_reshape_cmp()
+
+
+@pytest.mark.reid_forward
+def test_reid_reshape_input_128x64_target_128x64x1x1():
+    fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
+                          strategy1=(0, (2, 1)))
+    fact.forward_reshape_cmp()
+
+
+@pytest.mark.reid_grad
+def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
+    fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
+                          strategy1=(0, (2, 1)))
+    fact.grad_reshape_cmp()
diff --git a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
index 7fd5462200..6a6fe1a79e 100644
--- a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
+++ b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
@@ -1,236 +1,235 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import os
-import pytest
-from numpy import allclose as allclose_nparray
-
-import mindspore.communication.management as distributedTool
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.nn import Cell
-from mindspore.ops import operations as P
-from mindspore.ops.composite import grad_all_with_sens
-
-device_num = 4
-device_id = int(os.environ["RANK_ID"])
-path = "./output/"
-
-
-def setup_module():
-    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
-    distributedTool.init()
-    distributedTool.create_group("0-3", [0, 1, 2, 3])
-    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
-
-
-def teardown_module():
-    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
-
-
-class Net(Cell):
-    def __init__(self, perm_in, strategy0=None, strategy1=None):
-        super(Net, self).__init__()
-        self.add = P.TensorAdd(strategy=strategy0)
-        self.transpose = P.Transpose(strategy=strategy1)
-        self.perm_in = perm_in
-
-    def construct(self, x, y):
-        out = self.add(x, y)
-        return self.transpose(out, self.perm_in)
-
-
-class Grad(Cell):
-    def __init__(self, network):
-        super(Grad, self).__init__()
-        self.network = network
-
-    def construct(self, x, y, output_grad):
-        return grad_all_with_sens(self.network)(x, y, output_grad)
-
-
-class TransposeFactory:
-    def __init__(self, input_shape, perm_in, strategy0, strategy1):
-        prefix = ""
-        size = 1
-        for s in input_shape:
-            prefix = prefix + str(s)
-            size = size * s
-        self.prefix = prefix
-        number_range = min(1000, size)
-        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
-            np.float32)
-        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
-            np.float32)
-        target_shape = self.input_np1.transpose(perm_in).shape
-        target_size = 1
-        for s in target_shape:
-            target_size = target_size * s
-        number_range = min(1000, target_size)
-        self.target_shape = target_shape
-        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
-                                         target_shape).astype(np.float32)
-        self.perm_in = perm_in
-        self.strategy0 = strategy0
-        self.strategy1 = strategy1
-        out_strategy = []
-        for i in perm_in:
-            out_strategy.append(strategy1[1][i])
-        self.out_strategy = out_strategy
-        need_dev_num0 = 1
-        need_dev_num1 = 1
-        for s in strategy0[1]:
-            need_dev_num0 = need_dev_num0 * s
-        for s in out_strategy:
-            need_dev_num1 = need_dev_num1 * s
-        self.x_id = device_id % need_dev_num0
-        self.y_id = device_id % need_dev_num0
-        device_index = self.id_to_list(device_id % need_dev_num1,
-                                       self.strategy1[1])  # encoding to get the index before transpose
-        device_index_transpose = []
-        for i in perm_in:
-            device_index_transpose.append(device_index[i])
-        self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)
-
-    def get_parallel_blocks(self, input_, strategy):
-        blocks = [input_]
-        i = 0
-        for stra in strategy:
-            temp = []
-            while len(blocks) > 0:
-                block = blocks.pop(0)
-                temp.extend(np.split(block, stra, axis=i))
-            blocks.extend(temp)
-            i += 1
-        return blocks
-
-    def id_to_list(self, id, shape):
-        result = []
-        r = id
-        for i in range(0, len(shape)):
-            v = 1
-            for j in range(i + 1, len(shape)):
-                v = v * shape[j]
-            result.append(r // v)
-            r = r % v
-        return result
-
-    def list_to_id(self, id_list, shape):
-        result = 0
-        for i in range(0, len(id_list)):
-            v = 1
-            for j in range(i + 1, len(id_list)):
-                v = v * shape[j]
-            result = result + id_list[i] * v
-        return result
-
-    def forward_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        net = Net(self.perm_in)
-        out = net(x, y)
-        return out.asnumpy()
-
-    def forward_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        net.set_auto_parallel()
-        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
-        return out.asnumpy()
-
-    def grad_mindspore_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        net = Net(self.perm_in)
-        grad_net = Grad(net)
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad)
-        return input_grad
-
-    def grad_mindspore_parallel_impl(self):
-        x = Tensor(self.input_np1)
-        y = Tensor(self.input_np2)
-        output_grad = Tensor(self.output_grad_np)
-        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
-        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
-        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
-        x1 = Tensor(inputs_x[self.x_id])
-        y1 = Tensor(inputs_y[self.y_id])
-        output_grad1 = Tensor(outgrads[self.out_id])
-        net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
-        grad_net = Grad(net)
-        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-        grad_net.set_auto_parallel()
-        grad_net.set_train()
-        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
-                              parallel_inputs_run=[x1, y1, output_grad1])
-        return input_grad
-
-    def forward_transpose_cmp(self):
-        out_mindspore = self.forward_mindspore_impl()
-        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
-        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
-        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
-
-    def grad_transpose_cmp(self):
-        input_grad_mindspore = self.grad_mindspore_impl()
-        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
-        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
-        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
-        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
-        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
-        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
-        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
-        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
-        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
-
-
-def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
-    fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
-    fact.forward_transpose_cmp()
-
-
-def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
-    fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
-    fact.grad_transpose_cmp()
-
-
-def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
-    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
-    fact.forward_transpose_cmp()
-
-
-def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
-    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
-    fact.grad_transpose_cmp()
-
-
-def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
-    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
-    fact.forward_transpose_cmp()
-
-
-def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
-    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
-    fact.grad_transpose_cmp()
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from numpy import allclose as allclose_nparray
+
+import mindspore.communication.management as distributedTool
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops.composite import grad_all_with_sens
+
+device_num = 4
+device_id = int(os.environ["RANK_ID"])
+path = "./output/"
+
+
+def setup_module():
+    print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
+    context.set_context(mode=context.GRAPH_MODE)
+    context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
+    distributedTool.init()
+    distributedTool.create_group("0-3", [0, 1, 2, 3])
+    print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
+
+
+def teardown_module():
+    print("~~~~~~~~~~~~tear down~~~~~~~~~~")
+
+
+class Net(Cell):
+    def __init__(self, perm_in, strategy0=None, strategy1=None):
+        super(Net, self).__init__()
+        self.add = P.TensorAdd(strategy=strategy0)
+        self.transpose = P.Transpose(strategy=strategy1)
+        self.perm_in = perm_in
+
+    def construct(self, x, y):
+        out = self.add(x, y)
+        return self.transpose(out, self.perm_in)
+
+
+class Grad(Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.network = network
+
+    def construct(self, x, y, output_grad):
+        return grad_all_with_sens(self.network)(x, y, output_grad)
+
+
+class TransposeFactory:
+    def __init__(self, input_shape, perm_in, strategy0, strategy1):
+        prefix = ""
+        size = 1
+        for s in input_shape:
+            prefix = prefix + str(s)
+            size = size * s
+        self.prefix = prefix
+        number_range = min(1000, size)
+        self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
+            np.float32)
+        self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
+            np.float32)
+        target_shape = self.input_np1.transpose(perm_in).shape
+        target_size = 1
+        for s in target_shape:
+            target_size = target_size * s
+        number_range = min(1000, target_size)
+        self.target_shape = target_shape
+        self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
+                                         target_shape).astype(np.float32)
+        self.perm_in = perm_in
+        self.strategy0 = strategy0
+        self.strategy1 = strategy1
+        out_strategy = []
+        for i in perm_in:
+            out_strategy.append(strategy1[1][i])
+        self.out_strategy = out_strategy
+        need_dev_num0 = 1
+        need_dev_num1 = 1
+        for s in strategy0[1]:
+            need_dev_num0 = need_dev_num0 * s
+        for s in out_strategy:
+            need_dev_num1 = need_dev_num1 * s
+        self.x_id = device_id % need_dev_num0
+        self.y_id = device_id % need_dev_num0
+        device_index = self.id_to_list(device_id % need_dev_num1,
+                                       self.strategy1[1])  # encoding to get the index before transpose
+        device_index_transpose = []
+        for i in perm_in:
+            device_index_transpose.append(device_index[i])
+        self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)
+
+    def get_parallel_blocks(self, input_, strategy):
+        blocks = [input_]
+        i = 0
+        for stra in strategy:
+            temp = []
+            while len(blocks) > 0:
+                block = blocks.pop(0)
+                temp.extend(np.split(block, stra, axis=i))
+            blocks.extend(temp)
+            i += 1
+        return blocks
+
+    def id_to_list(self, id_, shape):
+        result = []
+        r = id_
+        for i in range(0, len(shape)):
+            v = 1
+            for j in range(i + 1, len(shape)):
+                v = v * shape[j]
+            result.append(r // v)
+            r = r % v
+        return result
+
+    def list_to_id(self, id_list, shape):
+        result = 0
+        for i in range(0, len(id_list)):
+            v = 1
+            for j in range(i + 1, len(id_list)):
+                v = v * shape[j]
+            result = result + id_list[i] * v
+        return result
+
+    def forward_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        net = Net(self.perm_in)
+        out = net(x, y)
+        return out.asnumpy()
+
+    def forward_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        net.set_auto_parallel()
+        out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
+        return out.asnumpy()
+
+    def grad_mindspore_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        net = Net(self.perm_in)
+        grad_net = Grad(net)
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad)
+        return input_grad
+
+    def grad_mindspore_parallel_impl(self):
+        x = Tensor(self.input_np1)
+        y = Tensor(self.input_np2)
+        output_grad = Tensor(self.output_grad_np)
+        inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
+        inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
+        outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
+        x1 = Tensor(inputs_x[self.x_id])
+        y1 = Tensor(inputs_y[self.y_id])
+        output_grad1 = Tensor(outgrads[self.out_id])
+        net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
+        grad_net = Grad(net)
+        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+        grad_net.set_auto_parallel()
+        grad_net.set_train()
+        input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
+                              parallel_inputs_run=[x1, y1, output_grad1])
+        return input_grad
+
+    def forward_transpose_cmp(self):
+        out_mindspore = self.forward_mindspore_impl()
+        out_mindspore_parallel = self.forward_mindspore_parallel_impl()
+        out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
+        assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
+
+    def grad_transpose_cmp(self):
+        input_grad_mindspore = self.grad_mindspore_impl()
+        input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
+        input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
+        input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
+        input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
+        input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
+        input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
+        input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
+        assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
+        assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
+
+
+def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
+    fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
+    fact.forward_transpose_cmp()
+
+
+def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
+    fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
+    fact.grad_transpose_cmp()
+
+
+def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
+    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
+    fact.forward_transpose_cmp()
+
+
+def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
+    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
+    fact.grad_transpose_cmp()
+
+
+def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
+    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
+    fact.forward_transpose_cmp()
+
+
+def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
+    fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
+    fact.grad_transpose_cmp()
diff --git a/tests/ut/python/parallel/test_add_relu_redistribution.py b/tests/ut/python/parallel/test_add_relu_redistribution.py
index 7a9934fe39..08ef18699a 100644
--- a/tests/ut/python/parallel/test_add_relu_redistribution.py
+++ b/tests/ut/python/parallel/test_add_relu_redistribution.py
@@ -54,7 +54,7 @@ class Grad(nn.Cell):
         return C.grad_all(self.network)(x, y)
 
 
-def compile(net, x, y):
+def compile_net(net, x, y):
     net.set_auto_parallel()
     _executor.compile(net, x, y)
 
@@ -69,7 +69,7 @@ def test_add_relu_stride_slice():
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y)
+    compile_net(net, x, y)
 
 
 def test_add_relu_all_gather():
@@ -82,4 +82,4 @@ def test_add_relu_all_gather():
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y)
+    compile_net(net, x, y)
diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py
index c593869430..607213f806 100644
--- a/tests/ut/python/parallel/test_allreduce_fusion.py
+++ b/tests/ut/python/parallel/test_allreduce_fusion.py
@@ -17,7 +17,6 @@ import numpy as np
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor, context
-from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
@@ -131,56 +130,56 @@ def test_allreduce_fusion_parameters():
     cost_model_context.reset_cost_model_context()
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2)
     algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
-    assert (algorithm == 2)
+    assert algorithm == 2
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
     algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
-    assert (algorithm == 1)
+    assert algorithm == 1
     cost_model_context.reset_cost_model_context()
     algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
-    assert (algorithm == 0)
+    assert algorithm == 0
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
     fusion_times = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_times')
-    assert (fusion_times == 2)
+    assert fusion_times == 2
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.2)
     tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent')
-    assert (tail_percent == 0.2)
+    assert tail_percent == 0.2
     cost_model_context.reset_cost_model_context()
     tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent')
-    assert (tail_percent == 0.1)
+    assert tail_percent == 0.1
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.2)
     tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time')
-    assert (tail_time == 0.2)
+    assert tail_time == 0.2
     cost_model_context.reset_cost_model_context()
     tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time')
-    assert (tail_time == 0.1)
+    assert tail_time == 0.1
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.2)
     allreduce_inherent_time = cost_model_context.get_cost_model_context(
         'costmodel_allreduce_fusion_allreduce_inherent_time')
-    assert (allreduce_inherent_time == 0.2)
+    assert allreduce_inherent_time == 0.2
     cost_model_context.reset_cost_model_context()
     allreduce_inherent_time = cost_model_context.get_cost_model_context(
         'costmodel_allreduce_fusion_allreduce_inherent_time')
-    assert (allreduce_inherent_time == 0.1)
+    assert allreduce_inherent_time == 0.1
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.2)
     allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth')
-    assert (allreduce_bandwidth == 0.2)
+    assert allreduce_bandwidth == 0.2
     cost_model_context.reset_cost_model_context()
     allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth')
-    assert (allreduce_bandwidth == 0.1)
+    assert allreduce_bandwidth == 0.1
 
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.2)
     computation_time_parameter = cost_model_context.get_cost_model_context(
         'costmodel_allreduce_fusion_computation_time_parameter')
-    assert (computation_time_parameter == 0.2)
+    assert computation_time_parameter == 0.2
     cost_model_context.reset_cost_model_context()
     computation_time_parameter = cost_model_context.get_cost_model_context(
         'costmodel_allreduce_fusion_computation_time_parameter')
-    assert (computation_time_parameter == 0.1)
+    assert computation_time_parameter == 0.1
 
 
 def test_allreduce_fusion1():
@@ -201,7 +200,7 @@ def test_allreduce_fusion1():
                    'backbone2.fc2.weight': 1,
                    'backbone2.fc1.weight': 1,
                    'backbone1.fc1.weight': 1}
-    assert (allreduce_fusion_dict == expect_dict)
+    assert allreduce_fusion_dict == expect_dict
     cost_model_context.reset_cost_model_context()
 
 
@@ -214,7 +213,7 @@ def test_allreduce_fusion2():
     net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
     allreduce_fusion_dict = train_common(net)
     expect_dict = {}
-    assert (allreduce_fusion_dict == expect_dict)
+    assert allreduce_fusion_dict == expect_dict
     cost_model_context.reset_cost_model_context()
 
 
@@ -240,7 +239,7 @@ def test_allreduce_fusion3():
                    'backbone1.fc2.weight': 2,
                    'backbone1.fc1.bias': 2,
                    'backbone1.fc1.weight': 2}
-    assert (allreduce_fusion_dict == expect_dict)
+    assert allreduce_fusion_dict == expect_dict
     cost_model_context.reset_cost_model_context()
 
 
@@ -267,7 +266,7 @@ def test_allreduce_fusion4():
                    'backbone1.fc2.weight': 1,
                    'backbone1.fc1.weight': 1}
 
-    assert (allreduce_fusion_dict == expect_dict)
+    assert allreduce_fusion_dict == expect_dict
     cost_model_context.reset_cost_model_context()
 
 
@@ -295,7 +294,7 @@ def test_allreduce_fusion5():
                    'backbone1.fc4.weight': 2,
                    'backbone1.fc3.weight': 2,
                    'backbone1.fc2.weight': 1,
-                   'backbone1.fc1.weight': 1, }
+                   'backbone1.fc1.weight': 1,}
 
-    assert (allreduce_fusion_dict == expect_dict)
+    assert allreduce_fusion_dict == expect_dict
     cost_model_context.reset_cost_model_context()
diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py
index 7f9fcbfa76..a03a83b0ba 100644
--- a/tests/ut/python/parallel/test_alltoall.py
+++ b/tests/ut/python/parallel/test_alltoall.py
@@ -67,7 +67,6 @@ def all_to_all_net(strategy1):
 
 
 def all_to_all_common(strategy1):
-    batch_size = 32
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
@@ -104,7 +103,7 @@ def test_all_to_all():
                        [8, 1]],
                    'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [
                        [1, 1], [1, 8]]}
-    assert (strategys == expect_dict)
+    assert strategys == expect_dict
     context.set_context(save_graphs=False)
 
 
diff --git a/tests/ut/python/parallel/test_arithmetic.py b/tests/ut/python/parallel/test_arithmetic.py
index d4a926f0f4..6bb2aac5d2 100644
--- a/tests/ut/python/parallel/test_arithmetic.py
+++ b/tests/ut/python/parallel/test_arithmetic.py
@@ -43,7 +43,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -69,7 +69,7 @@ def test_matmul_sub():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_add():
@@ -93,7 +93,7 @@ def test_matmul_add():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_mul():
@@ -117,7 +117,7 @@ def test_matmul_mul():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_div():
@@ -141,7 +141,7 @@ def test_matmul_div():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_greater():
@@ -165,7 +165,7 @@ def test_matmul_greater():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_add_broadcast():
@@ -189,7 +189,7 @@ def test_matmul_add_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_add_broadcast2():
@@ -213,7 +213,7 @@ def test_matmul_add_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_sub_broadcast():
@@ -237,7 +237,7 @@ def test_matmul_sub_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_sub_broadcast2():
@@ -261,7 +261,7 @@ def test_matmul_sub_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_mul_broadcast():
@@ -285,7 +285,7 @@ def test_matmul_mul_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_mul_broadcast2():
@@ -309,7 +309,7 @@ def test_matmul_mul_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_div_broadcast():
@@ -333,7 +333,7 @@ def test_matmul_div_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_div_broadcast2():
@@ -357,7 +357,7 @@ def test_matmul_div_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_greater_broadcast():
@@ -381,7 +381,7 @@ def test_matmul_greater_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_greater_broadcast2():
@@ -405,7 +405,7 @@ def test_matmul_greater_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_floordiv():
@@ -429,7 +429,7 @@ def test_matmul_floordiv():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_floordiv_broadcast():
@@ -453,7 +453,7 @@ def test_matmul_floordiv_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_floordiv_broadcast2():
@@ -477,7 +477,7 @@ def test_matmul_floordiv_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_assign_sub():
@@ -504,4 +504,4 @@ def test_assign_sub():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
     z = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y, z)
+    compile_net(net, x, y, z)
diff --git a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
index 02f0b9c77d..087065a9a3 100644
--- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
+++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
@@ -20,7 +20,6 @@ from mindspore import Tensor
 from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
-from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
index 7eb8ddccd3..aa6177b996 100644
--- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
+++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
@@ -18,7 +18,6 @@ import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore import context
-from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
@@ -48,7 +47,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b, phase):
+def compile_net(net, x, y, b, phase):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b, phase=phase)
 
@@ -73,7 +72,7 @@ def test_auto_parallel_arithmetic():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 128]), dtype=ms.float32)
     b = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    compile(net, x, y, b, phase='train')
+    compile_net(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]],
                            'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]}
@@ -100,7 +99,7 @@ def test_auto_parallel_arithmetic_broadcast_both():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b, phase='train')
+    compile_net(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]],
                            'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]}
@@ -127,7 +126,7 @@ def test_auto_parallel_arithmetic_broadcast_right():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 32]), dtype=ms.float32)
     b = Tensor(np.ones([32]), dtype=ms.float32)
-    compile(net, x, y, b, phase='train')
+    compile_net(net, x, y, b, phase='train')
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]],
                            'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
@@ -154,7 +153,7 @@ def test_auto_parallel_arithmetic_broadcast_left():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 32]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
-    compile(net, x, y, b, phase="train")
+    compile_net(net, x, y, b, phase="train")
     strategies = _executor._get_strategy(net)
     expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]],
                            'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
diff --git a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
index d95473d993..75056dee73 100644
--- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
+++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import re
+import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
@@ -21,7 +21,6 @@ from mindspore import Tensor
 from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
-from mindspore.ops import operations as P
 from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
index 0c1b7dc81b..e121cecf8e 100644
--- a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
@@ -10,7 +10,6 @@ from mindspore.ops import composite as C
 from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.parallel import _cost_model_context as cost_model_context
-from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters
 from mindspore.parallel._utils import _reset_op_id as reset_op_id
 
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
index 6cb9b6f7a2..7852165ee7 100644
--- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, z, w, b)
 
 
-def compile(net, x, y, z, w, b):
+def compile_net(net, x, y, z, w, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, z, w, b)
 
@@ -77,7 +77,7 @@ def test_four_matmul_linear():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, z, w, b)
+    compile_net(net, x, y, z, w, b)
 
 
 def test_four_matmul1():
@@ -103,7 +103,7 @@ def test_four_matmul1():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, z, w, b)
+    compile_net(net, x, y, z, w, b)
 
 
 def test_four_matmul2():
@@ -130,4 +130,4 @@ def test_four_matmul2():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, z, w, b)
+    compile_net(net, x, y, z, w, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_inference.py b/tests/ut/python/parallel/test_auto_parallel_inference.py
index ac8d4fc473..5181be717d 100644
--- a/tests/ut/python/parallel/test_auto_parallel_inference.py
+++ b/tests/ut/python/parallel/test_auto_parallel_inference.py
@@ -36,4 +36,4 @@ def test_inference_phase():
     train_network.set_train()
     train_network.set_auto_parallel()
 
-    output = train_network(predict, label)
+    _ = train_network(predict, label)
diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
index 0c5caa38b8..c2bf469c6a 100644
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import re
+import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
diff --git a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
index 3f4b093d63..6d75f2a9e5 100644
--- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
@@ -16,7 +16,6 @@ import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
-from mindspore import Tensor
 from mindspore import Tensor, Parameter
 from mindspore import context
 from mindspore.common import dtype as mstype
diff --git a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
index 9af55db3be..2e66490498 100644
--- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -71,7 +71,7 @@ def test_sum_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul2():
@@ -95,7 +95,7 @@ def test_sum_mul2():
     x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul3():
@@ -119,4 +119,4 @@ def test_sum_mul3():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py
index ce308cf485..3adbb43717 100644
--- a/tests/ut/python/parallel/test_auto_parallel_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py
@@ -215,7 +215,7 @@ def test_reshape_auto_5():
     size = 8
     context.set_auto_parallel_context(device_num=size, global_rank=0)
     x = Tensor(np.ones([4, 1024 * size, 1]), dtype=ms.float32)
-    y = Tensor(np.ones([4, 1024 * size, ]), dtype=ms.float32)
+    y = Tensor(np.ones([4, 1024 * size,]), dtype=ms.float32)
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
@@ -263,7 +263,7 @@ def test_reshape_auto_6():
     size = 8
     context.set_auto_parallel_context(device_num=size, global_rank=0)
     x = Tensor(np.ones([4, 1024, 1]), dtype=ms.float32)
-    y = Tensor(np.ones([4, 1024, ]), dtype=ms.float32)
+    y = Tensor(np.ones([4, 1024,]), dtype=ms.float32)
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
diff --git a/tests/ut/python/parallel/test_auto_parallel_rhombus.py b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
index fd0d2cba43..b778e9ed38 100644
--- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py
+++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -72,7 +72,7 @@ def test_rhombus1():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_rhombus2():
@@ -103,7 +103,7 @@ def test_rhombus2():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_rhombus3():
@@ -134,4 +134,4 @@ def test_rhombus3():
 
     net = GradWrap(NetWithLoss(Net()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net, x, y, z)
+    compile_net(net, x, y, z)
diff --git a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
index c63a0d378d..1bcd49b8d8 100644
--- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
+++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
@@ -21,7 +21,6 @@ from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class NetWithLoss(nn.Cell):
diff --git a/tests/ut/python/parallel/test_auto_parallel_transformer.py b/tests/ut/python/parallel/test_auto_parallel_transformer.py
index dd4734c5a3..c208e7852c 100644
--- a/tests/ut/python/parallel/test_auto_parallel_transformer.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py
@@ -105,8 +105,8 @@ def test_dmnet_train_step():
     size = 8
     context.set_auto_parallel_context(device_num=size, global_rank=0)
 
-    input = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
     net = GradWrap(NetWithLoss(MultiTransformer()))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
     net.set_auto_parallel()
-    _executor.compile(net, input)
+    _executor.compile(net, input_)
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_bn.py b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
index c6c01be5c1..3c73290b1e 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_bn.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
@@ -1,5 +1,19 @@
-import numpy as np
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import re
+import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
@@ -33,7 +47,7 @@ class Blockcell(nn.Cell):
         return out
 
 
-def getBlock():
+def get_block():
     return Blockcell()
 
 
@@ -41,8 +55,8 @@ def test_two_bn():
     class Net(nn.Cell):
         def __init__(self):
             super().__init__()
-            self.block1 = getBlock()
-            self.block2 = getBlock()
+            self.block1 = get_block()
+            self.block2 = get_block()
             self.relu = P.ReLU()
             self.add = P.TensorAdd()
             self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
index c5352fc1c7..1792687c57 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@@ -104,23 +104,23 @@ def test_two_matmul():
     set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32,
                         fully_use_devices=False, elementwise_op_strategy_follow=False)
     para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
-    assert para_slice_align_enable == False
+    assert not para_slice_align_enable
     para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
     assert para_slice_align_size == 32
     fully_use_devices = get_algo_parameters("fully_use_devices")
-    assert fully_use_devices == False
+    assert not fully_use_devices
     elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
-    assert elementwise_op_strategy_follow == False
+    assert not elementwise_op_strategy_follow
 
     reset_algo_parameters()
     para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
-    assert para_slice_align_enable == False
+    assert not para_slice_align_enable
     para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
     assert para_slice_align_size == 16
     fully_use_devices = get_algo_parameters("fully_use_devices")
-    assert fully_use_devices == True
+    assert fully_use_devices
     elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
-    assert elementwise_op_strategy_follow == False
+    assert not elementwise_op_strategy_follow
 
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
diff --git a/tests/ut/python/parallel/test_auto_star_elimination.py b/tests/ut/python/parallel/test_auto_star_elimination.py
index b8919c2b94..9c4678f91c 100644
--- a/tests/ut/python/parallel/test_auto_star_elimination.py
+++ b/tests/ut/python/parallel/test_auto_star_elimination.py
@@ -11,9 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import math
+
 import numpy as np
-import os
 
 import mindspore as ms
 import mindspore.nn as nn
@@ -21,10 +20,8 @@ from mindspore import Tensor, Parameter
 from mindspore import context
 from mindspore.common import dtype as mstype
 from mindspore.common.api import _executor
-from mindspore.common.initializer import initializer
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 
diff --git a/tests/ut/python/parallel/test_batch_matmul.py b/tests/ut/python/parallel/test_batch_matmul.py
index 7ed4029b51..f49a9c322e 100644
--- a/tests/ut/python/parallel/test_batch_matmul.py
+++ b/tests/ut/python/parallel/test_batch_matmul.py
@@ -41,7 +41,7 @@ _w2 = Tensor(np.ones([128, 32, 32]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -54,7 +54,7 @@ def test_batch_matmul_data_parallel():
     strategy1 = ((16, 1, 1), (16, 1, 1))
     strategy2 = ((16, 1, 1), (16, 1, 1))
     net = Net(_w1, _w2, False, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_batch_matmul_model_parallel():
@@ -62,7 +62,7 @@ def test_batch_matmul_model_parallel():
     strategy1 = ((1, 1, 1), (1, 1, 1))
     strategy2 = ((1, 1, 1), (1, 1, 16))
     net = Net(_w1, _w2, False, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_batch_matmul_hybrid_parallel():
@@ -70,13 +70,13 @@ def test_batch_matmul_hybrid_parallel():
     strategy1 = ((2, 2, 2), (2, 2, 2))
     strategy2 = ((2, 2, 2), (2, 2, 2))
     net = Net(_w1, _w2, False, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_batch_matmul_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1, _w2, False)
-    compile(net)
+    compile_net(net)
 
 
 def test_batch_matmul_repeat_calc():
@@ -84,7 +84,7 @@ def test_batch_matmul_repeat_calc():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((1, 2, 2), (1, 2, 2))
     net = Net(_w1, _w2, False, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_batch_matmul_transpose_b():
@@ -92,4 +92,4 @@ def test_batch_matmul_transpose_b():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((1, 2, 2), (1, 2, 2))
     net = Net(_w1, _w2, True, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
index d395a56a06..5935c44441 100644
--- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
@@ -30,7 +30,6 @@ from mindspore.train import Model, ParallelMode
 from tests.dataset_mock import MindData
 
 dev_num = 8
-strategy_no_weight = ((dev_num, 1, 1, 1),)
 strategy_weight = ((dev_num, 1, 1, 1), (1, 1, 1, 1))
 strategy_bn = ((dev_num, 1, 1, 1), (1,), (1,))
 strategy_fc_weight_bias = ((dev_num, 1), (1, 1), (1,))
@@ -62,7 +61,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0):
     weight_shape = (out_channels, in_channels, 7, 7)
     weight = Tensor(np.ones(weight_shape).astype(np.float32))
     conv = Conv2d(in_channels, out_channels,
-                  kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False,
+                  kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False,
                   pad_mode="same")
     conv.conv2d.set_strategy(strategy_weight)
     return conv
@@ -95,7 +94,7 @@ class ResNet(Cell):
     def __init__(self, num_classes=100):
         super(ResNet, self).__init__()
         strategy_no_weight = ((dev_num, 1, 1, 1),)
-        self.conv1 = conv7x7(3, 64, stride=2, padding=3)
+        self.conv1 = conv7x7(3, 64, stride=2, padding=0)
         self.bn1 = bn_with_initialize(64)
         self.relu = ReLU()
         self.relu.relu.set_strategy(strategy_no_weight)
@@ -124,7 +123,6 @@ def test_batchnorm_batch_parallel():
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
-    rank_size = 0
 
     predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32)
     label = Tensor(np.ones([batch_size]), dtype=ms.int32)
diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py
index e5c94bd241..526ab3203f 100644
--- a/tests/ut/python/parallel/test_bn_prelu_cell.py
+++ b/tests/ut/python/parallel/test_bn_prelu_cell.py
@@ -171,7 +171,7 @@ class PReLU(nn.Cell):
 
         if not isinstance(w, Tensor):
             w = Tensor(w)
-        self.w = Parameter(initializer(w, [channel, ]), name='a')
+        self.w = Parameter(initializer(w, [channel,]), name='a')
         self.prelu = P.PReLU()
         self.relu = P.ReLU().set_strategy(((1)))
 
@@ -181,7 +181,7 @@ class PReLU(nn.Cell):
 
 
 class BNNet(nn.Cell):
-    def __init__(self, strategy0, strategy1, strategy2):
+    def __init__(self):
         super(BNNet, self).__init__()
         self.bn = FusedBatchNorm(512)
         self.prelu = PReLU(512)
@@ -192,13 +192,12 @@ class BNNet(nn.Cell):
         return x
 
 
-def bn_net(strategy0, strategy1, strategy2):
-    return BNNet(strategy0=strategy0, strategy1=strategy1, strategy2=strategy2)
+def bn_net():
+    return BNNet()
 
 
-def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strategy2=None, strategy_loss=None):
+def bn_common(parallel_mode, train_flag, strategy_loss=None):
     context.set_context(mode=context.GRAPH_MODE)
-    batch_size = 32
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
@@ -207,7 +206,7 @@ def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strateg
     predict = Tensor(np.ones([32, 512]), dtype=ms.float32)
     label = Tensor(np.ones([32]), dtype=ms.int32)
     dataset = Dataset(predict, label, 2)
-    net = bn_net(strategy0, strategy1, strategy2)
+    net = bn_net()
 
     loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
     loss.softmax_cross_entropy.set_strategy(strategy_loss)
diff --git a/tests/ut/python/parallel/test_bool_grad.py b/tests/ut/python/parallel/test_bool_grad.py
index 6ce4e9be94..735f66bb6a 100644
--- a/tests/ut/python/parallel/test_bool_grad.py
+++ b/tests/ut/python/parallel/test_bool_grad.py
@@ -21,7 +21,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.nn.optim import Momentum
 from mindspore.ops import operations as P
-from mindspore.train import Model, ParallelMode
+from mindspore.train import Model
 from tests.dataset_mock import MindData
 
 context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/ut/python/parallel/test_broadcast_dict.py b/tests/ut/python/parallel/test_broadcast_dict.py
index ccc299a93f..ff02d045ca 100644
--- a/tests/ut/python/parallel/test_broadcast_dict.py
+++ b/tests/ut/python/parallel/test_broadcast_dict.py
@@ -54,7 +54,7 @@ def test_param_broadcast():
     network.set_train()
 
     predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)
-    out = network(predict)
+    _ = network(predict)
     context.reset_auto_parallel_context()
 
 
@@ -67,5 +67,5 @@ def test_param_not_broadcast():
     network.set_train()
 
     predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)
-    out = network(predict)
+    _ = network(predict)
     context.reset_auto_parallel_context()
diff --git a/tests/ut/python/parallel/test_comparison_function_info.py b/tests/ut/python/parallel/test_comparison_function_info.py
index 4ac4e4cbcb..adb5a5f395 100644
--- a/tests/ut/python/parallel/test_comparison_function_info.py
+++ b/tests/ut/python/parallel/test_comparison_function_info.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -69,7 +69,7 @@ def test_matmul_equal():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_not_equal():
@@ -92,7 +92,7 @@ def test_matmul_not_equal():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_not_equal_repeated_calculation():
@@ -115,7 +115,7 @@ def test_matmul_not_equal_repeated_calculation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_maximum():
@@ -138,7 +138,7 @@ def test_matmul_maximum():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_maximum_broadcast():
@@ -161,7 +161,7 @@ def test_matmul_maximum_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_maximum_broadcast2():
@@ -184,7 +184,7 @@ def test_matmul_maximum_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_minimum():
@@ -207,7 +207,7 @@ def test_matmul_minimum():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_minimum_broadcast():
@@ -230,7 +230,7 @@ def test_matmul_minimum_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_minimum_broadcast2():
@@ -253,7 +253,7 @@ def test_matmul_minimum_broadcast2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_minimum_auto_parallel():
@@ -274,4 +274,4 @@ def test_matmul_minimum_auto_parallel():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_dataset_util.py b/tests/ut/python/parallel/test_dataset_util.py
index 9f81e62b02..f3c861dd68 100644
--- a/tests/ut/python/parallel/test_dataset_util.py
+++ b/tests/ut/python/parallel/test_dataset_util.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-
 import mindspore as ms
 from mindspore import Tensor
 from mindspore.train._utils import _to_full_shapes, _to_full_tensor
@@ -35,7 +33,7 @@ def test_to_full_tensor_1():
     expect = ([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 2, 3], [4, 5, 6], [0, 0, 0], [0, 0, 0]])
     expect_tensor = Tensor(expect, dtype=ms.float32)
 
-    assert (full_tensor[0] == expect_tensor)
+    assert full_tensor[0] == expect_tensor
 
 
 def test_to_full_tensor_2():
@@ -52,7 +50,7 @@ def test_to_full_tensor_2():
     expect_tensor1 = Tensor(expect1, dtype=ms.int32)
     expect_tensors = (expect_tensor0, expect_tensor1)
 
-    assert (full_tensor == expect_tensors)
+    assert full_tensor == expect_tensors
 
 
 def test_to_full_tensor_sens_2():
@@ -70,4 +68,4 @@ def test_to_full_tensor_sens_2():
     expect_tensor_sens = Tensor(0.1, dtype=ms.float32)
     expect_tensors = (expect_tensor0, expect_tensor1, expect_tensor_sens)
 
-    assert (full_tensor == expect_tensors)
+    assert full_tensor == expect_tensors
diff --git a/tests/ut/python/parallel/test_dense_matmul.py b/tests/ut/python/parallel/test_dense_matmul.py
index 48ee5c8d52..e408c65f84 100644
--- a/tests/ut/python/parallel/test_dense_matmul.py
+++ b/tests/ut/python/parallel/test_dense_matmul.py
@@ -47,8 +47,8 @@ class DenseMutMulNet(nn.Cell):
 
 def test_dmnet_train_step():
     context.reset_auto_parallel_context()
-    input = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([32, 768]).astype(np.float32))
     net = DenseMutMulNet()
     net = train_step_with_loss_warp(DenseMutMulNet())
-    _executor.compile(net, input, label)
+    _executor.compile(net, input_, label)
diff --git a/tests/ut/python/parallel/test_different_type_for_div_op.py b/tests/ut/python/parallel/test_different_type_for_div_op.py
index 31af23e100..4bb09bbdc2 100644
--- a/tests/ut/python/parallel/test_different_type_for_div_op.py
+++ b/tests/ut/python/parallel/test_different_type_for_div_op.py
@@ -32,7 +32,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, bias)
 
 
-def compile(net, x, y, bias):
+def compile_net(net, x, y, bias):
     net.set_auto_parallel()
     _executor.compile(net, x, y, bias)
 
@@ -58,7 +58,7 @@ def test_sum_as_loss_float16():
     x = Tensor(np.ones([64, 32]), dtype=ms.float16)
     y = Tensor(np.ones([64, 32]), dtype=ms.float16)
     bias = Tensor(np.ones([64]), dtype=ms.float16)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
 
 
 def test_sum_as_loss_float32():
@@ -82,7 +82,7 @@ def test_sum_as_loss_float32():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
 
 
 def test_sum_as_loss_int32():
@@ -106,4 +106,4 @@ def test_sum_as_loss_int32():
     x = Tensor(np.ones([64, 32]), dtype=ms.int32)
     y = Tensor(np.ones([64, 32]), dtype=ms.int32)
     bias = Tensor(np.ones([64]), dtype=ms.int32)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_dropout_do_mask.py b/tests/ut/python/parallel/test_dropout_do_mask.py
index 03bcf3c1b6..f3d8f6ef8e 100644
--- a/tests/ut/python/parallel/test_dropout_do_mask.py
+++ b/tests/ut/python/parallel/test_dropout_do_mask.py
@@ -50,7 +50,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -63,7 +63,7 @@ def test_dropout_do_mask_data_parallel():
     strategy1 = ((16, 1), (16, 1))
     strategy2 = ((16, 1),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_dropout_do_mask_model_parallel():
@@ -71,7 +71,7 @@ def test_dropout_do_mask_model_parallel():
     strategy1 = ((1, 16), (1, 16))
     strategy2 = ((1, 16),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_dropout_do_mask_hybrid_parallel():
@@ -79,13 +79,13 @@ def test_dropout_do_mask_hybrid_parallel():
     strategy1 = ((4, 4), (4, 4))
     strategy2 = ((4, 4),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_dropout_do_mask_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1)
-    compile(net)
+    compile_net(net)
 
 
 def test_dropout_do_mask_repeat_calc():
@@ -93,4 +93,4 @@ def test_dropout_do_mask_repeat_calc():
     strategy1 = ((4, 4), (4, 4))
     strategy2 = ((2, 4),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py
index 89a6893347..fd6e5cfd8d 100644
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -72,7 +72,7 @@ def test_matmul_pow():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_exp():
@@ -98,7 +98,7 @@ def test_matmul_exp():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_log():
@@ -124,7 +124,7 @@ def test_matmul_log():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_logical_not():
@@ -151,7 +151,7 @@ def test_matmul_logical_not():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_cast():
@@ -178,7 +178,7 @@ def test_matmul_cast():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.int32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cast_before_mirror():
@@ -202,7 +202,7 @@ def test_cast_before_mirror():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float16)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cast_before_mirror1():
@@ -226,7 +226,7 @@ def test_cast_before_mirror1():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cast_before_mirror2():
@@ -250,7 +250,7 @@ def test_cast_before_mirror2():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cast_before_mirror3():
@@ -274,7 +274,7 @@ def test_cast_before_mirror3():
     x = Tensor(np.ones([128, 32]), dtype=ms.float16)
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_mul_two_cast():
@@ -303,4 +303,4 @@ def test_mul_two_cast():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_expand_dims.py b/tests/ut/python/parallel/test_expand_dims.py
index 649adcdbe9..9d144ed50d 100644
--- a/tests/ut/python/parallel/test_expand_dims.py
+++ b/tests/ut/python/parallel/test_expand_dims.py
@@ -54,7 +54,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -68,7 +68,7 @@ def test_expand_dims_data_parallel():
     strategy2 = ((16, 1, 1),)
     strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1))
     net = Net(_w1, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_expand_dims_model_parallel():
@@ -77,7 +77,7 @@ def test_expand_dims_model_parallel():
     strategy2 = ((1, 1, 16),)
     strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1))
     net = Net(_w1, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_expand_dims_hybrid_parallel():
@@ -86,13 +86,13 @@ def test_expand_dims_hybrid_parallel():
     strategy2 = ((2, 2, 4),)
     strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net(_w1, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_expand_dims_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1)
-    compile(net)
+    compile_net(net)
 
 
 def test_expand_dims_repeat_calc():
@@ -101,7 +101,7 @@ def test_expand_dims_repeat_calc():
     strategy2 = ((1, 2, 2),)
     strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net(_w1, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_expand_dims_parameter():
@@ -109,4 +109,4 @@ def test_expand_dims_parameter():
     strategy1 = ((1, 2, 2),)
     strategy2 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net2(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py
index f84dfe05d6..c8561210c5 100644
--- a/tests/ut/python/parallel/test_forward_graph.py
+++ b/tests/ut/python/parallel/test_forward_graph.py
@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     net.set_auto_parallel()
     _executor.compile(net, _x, _b)
     context.reset_auto_parallel_context()
@@ -50,7 +50,7 @@ def test_forward_graph_data_parallel():
     strategy1 = ((16, 1, 1), (16, 1, 1))
     strategy2 = ((16, 1, 1),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_forward_graph_model_parallel():
@@ -58,7 +58,7 @@ def test_forward_graph_model_parallel():
     strategy1 = ((1, 1, 16), (1, 1, 16))
     strategy2 = ((1, 1, 16),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_forward_graph_hybrid_parallel():
@@ -66,13 +66,13 @@ def test_forward_graph_hybrid_parallel():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((2, 2, 4),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_forward_graph_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1)
-    compile(net)
+    compile_net(net)
 
 
 def test_forward_graph_repeat_calc():
@@ -80,4 +80,4 @@ def test_forward_graph_repeat_calc():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((1, 2, 2),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py
index 6d943be511..26e0964b03 100644
--- a/tests/ut/python/parallel/test_gather_v2.py
+++ b/tests/ut/python/parallel/test_gather_v2.py
@@ -18,7 +18,6 @@ import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore import context
-from mindspore.common import dtype as mstype
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py
index a416923759..99559784a4 100644
--- a/tests/ut/python/parallel/test_gather_v2_primitive.py
+++ b/tests/ut/python/parallel/test_gather_v2_primitive.py
@@ -120,7 +120,7 @@ class TrainOneStepCell(Cell):
         return F.depend(loss, self.optimizer(grads))
 
 
-def net_trains(gather_v2_strategy, criterion, rank):
+def net_trains(criterion, rank):
     init()
     lr = 0.1
     momentum = 0.9
@@ -151,42 +151,42 @@ def test_auto_batch_parallel():
     gather_v2_strategy = None
     criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
     rank = 2
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_2d_index_auto_batch_parallel():
     gather_v2_strategy = None
     criterion = GatherV2(2, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
     rank = 2
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_batch_parallel():
     gather_v2_strategy = ((device_number, 1),)
     criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
     rank = 2
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_strategy1():
     gather_v2_strategy = ((16, 2),)
     rank = 2
     criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_strategy2():
     gather_v2_strategy = ((1, device_number),)
     rank = 2
     criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_strategy3():
     gather_v2_strategy = ((8, 1),)
     rank = 2
     criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 class GatherV2Axis1(_Loss):
@@ -217,18 +217,18 @@ def test_axis1_auto_batch_parallel():
     gather_v2_strategy = None
     criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
     rank = 2
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_axis1_batch_parallel():
     gather_v2_strategy = ((device_number, 1),)
     criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
     rank = 2
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
 
 
 def test_axis1_strategy1():
     gather_v2_strategy = ((16, 2),)
     rank = 17
     criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
-    net_trains(gather_v2_strategy, criterion, rank)
+    net_trains(criterion, rank)
diff --git a/tests/ut/python/parallel/test_get_next.py b/tests/ut/python/parallel/test_get_next.py
index 0ab5b5aa71..7bd8482027 100644
--- a/tests/ut/python/parallel/test_get_next.py
+++ b/tests/ut/python/parallel/test_get_next.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
@@ -23,8 +21,6 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from mindspore.ops.operations.comm_ops import _VirtualDataset
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 context.set_context(mode=context.GRAPH_MODE)
 
@@ -56,7 +52,7 @@ class GradWrap(nn.Cell):
         return C.grad_by_list(self.network, self.weights)()
 
 
-def compile(net):
+def compile_net(net):
     net.set_auto_parallel()
     _executor.compile(net)
 
@@ -67,7 +63,7 @@ def test_get_next_single():
             super().__init__()
             self.norm = P.L2Normalize(axis=1)
             self.prelu = P.PReLU()
-            self.w = Parameter(initializer(w, [channel, ]), name='w')
+            self.w = Parameter(initializer(w, [channel,]), name='w')
 
         def construct(self, data):
             x = self.norm(data)
@@ -84,7 +80,7 @@ def test_get_next_semi_auto_parallel():
             super().__init__()
             self.norm = P.L2Normalize().set_strategy(strategy1)
             self.prelu = P.PReLU().set_strategy(strategy2)
-            self.w = Parameter(initializer(w, [channel, ]), name='w')
+            self.w = Parameter(initializer(w, [channel,]), name='w')
 
         def construct(self, data):
             x = self.norm(data)
@@ -99,7 +95,7 @@ def test_get_next_semi_auto_parallel():
                                 strategy4=strategy4)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    compile(net)
+    compile_net(net)
 
 
 def test_get_next_semi_auto_parallel1():
@@ -108,7 +104,7 @@ def test_get_next_semi_auto_parallel1():
             super().__init__()
             self.norm = P.L2Normalize().set_strategy(strategy1)
             self.prelu = P.PReLU().set_strategy(strategy2)
-            self.w = Parameter(initializer(w, [channel, ]), name='w')
+            self.w = Parameter(initializer(w, [channel,]), name='w')
 
         def construct(self, data):
             x = self.norm(data)
@@ -123,7 +119,7 @@ def test_get_next_semi_auto_parallel1():
                                 strategy4=strategy4)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    compile(net)
+    compile_net(net)
 
 
 def test_get_next_auto_parallel():
@@ -132,7 +128,7 @@ def test_get_next_auto_parallel():
             super().__init__()
             self.norm = P.L2Normalize().set_strategy(strategy1)
             self.prelu = P.PReLU().set_strategy(strategy2)
-            self.w = Parameter(initializer(w, [channel, ]), name='w')
+            self.w = Parameter(initializer(w, [channel,]), name='w')
 
         def construct(self, data):
             x = self.norm(data)
@@ -144,7 +140,7 @@ def test_get_next_auto_parallel():
     net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2)
     net = GradWrap(net_with_loss)
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    compile(net)
+    compile_net(net)
 
 
 def test_only_one_get_next():
@@ -159,4 +155,4 @@ def test_only_one_get_next():
     context.set_auto_parallel_context(device_num=4, global_rank=0)
     net = Net()
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_get_parameter_layout.py b/tests/ut/python/parallel/test_get_parameter_layout.py
index 3d27ddec94..a34ee94840 100644
--- a/tests/ut/python/parallel/test_get_parameter_layout.py
+++ b/tests/ut/python/parallel/test_get_parameter_layout.py
@@ -52,8 +52,8 @@ def test_get_parameter_layout():
     x_layout = [[2, 4], [1, -1], [16, 32]]  # device_arrangement = [2, 4], tensor_map = [1, -1]
     weight_layout = [[2, 4], [0, -1], [16, 32]]  # device_arrangement = [2, 4], tensor_map = [0, -1]
     expect_dict = {'x': x_layout, 'w1': weight_layout}
-    # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut 
-    assert (net.parameter_layout_dict == expect_dict)
+    # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut
+    assert net.parameter_layout_dict == expect_dict
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/parallel/test_hybird_parallel_activation.py b/tests/ut/python/parallel/test_hybird_parallel_activation.py
index dded194bf1..8931fda12a 100644
--- a/tests/ut/python/parallel/test_hybird_parallel_activation.py
+++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -72,7 +72,7 @@ def test_matmul_tanh():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_activation():
@@ -98,7 +98,7 @@ def test_matmul_activation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_softmax():
@@ -124,7 +124,7 @@ def test_matmul_softmax():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_matmul_logsoftmax():
@@ -150,7 +150,7 @@ def test_matmul_logsoftmax():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_activations():
@@ -179,7 +179,7 @@ def test_activations():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_activations_repeated_calculation():
@@ -211,7 +211,7 @@ def test_activations_repeated_calculation():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_activations_axis_tuple():
@@ -243,4 +243,4 @@ def test_activations_axis_tuple():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py
index ce834a58f0..08fe687a73 100644
--- a/tests/ut/python/parallel/test_layer_norm.py
+++ b/tests/ut/python/parallel/test_layer_norm.py
@@ -48,7 +48,7 @@ _w = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -62,7 +62,7 @@ def test_layer_norm_data_parallel():
     strategy2 = ((16, 1, 1, 1), (1, 1, 1), (1, 1, 1))
     strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_layer_norm_model_parallel():
@@ -71,7 +71,7 @@ def test_layer_norm_model_parallel():
     strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1))
     strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_layer_norm_hybrid_parallel():
@@ -80,13 +80,13 @@ def test_layer_norm_hybrid_parallel():
     strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1))
     strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_layer_norm_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w)
-    compile(net)
+    compile_net(net)
 
 
 def test_layer_norm_repeat_calc():
@@ -95,7 +95,7 @@ def test_layer_norm_repeat_calc():
     strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1))
     strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
-    compile(net)
+    compile_net(net)
 
 
 def test_layer_norm_wrong_strategy():
@@ -105,4 +105,4 @@ def test_layer_norm_wrong_strategy():
     strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
     with pytest.raises(RuntimeError):
-        compile(net)
+        compile_net(net)
diff --git a/tests/ut/python/parallel/test_linear.py b/tests/ut/python/parallel/test_linear.py
index efa6f58c61..795a0f604f 100644
--- a/tests/ut/python/parallel/test_linear.py
+++ b/tests/ut/python/parallel/test_linear.py
@@ -21,7 +21,6 @@ from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class NetWithLoss(nn.Cell):
diff --git a/tests/ut/python/parallel/test_loss_and_optimizer.py b/tests/ut/python/parallel/test_loss_and_optimizer.py
index 6a2e9bc65d..b4cf62c29e 100644
--- a/tests/ut/python/parallel/test_loss_and_optimizer.py
+++ b/tests/ut/python/parallel/test_loss_and_optimizer.py
@@ -19,9 +19,8 @@ import mindspore.nn as nn
 from mindspore import Tensor, Parameter
 from mindspore import context
 from mindspore.common.api import _executor
-from mindspore.nn import TrainOneStepCell, WithLossCell
+from mindspore.nn import TrainOneStepCell
 from mindspore.nn.optim import Momentum, LARS
-from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 
 
@@ -36,7 +35,7 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict, b)[0]
 
 
-def compile(net, x, b):
+def compile_net(net, x, b):
     net.set_auto_parallel()
     _executor.compile(net, x, b)
 
@@ -72,7 +71,7 @@ def test_momentum():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
 
 
 def test_momentum_with_loss_scale():
@@ -106,7 +105,7 @@ def test_momentum_with_loss_scale():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
 
 
 def test_momentum_with_dynamic_lr():
@@ -141,7 +140,7 @@ def test_momentum_with_dynamic_lr():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
 
 
 def test_momentum_with_loss_scale_and_dynamic_lr():
@@ -177,7 +176,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
 
 
 def test_lars():
@@ -205,11 +204,11 @@ def test_lars():
     net = Net(strategy1, strategy2, weight)
 
     lr = Tensor(np.ones([6]), dtype=ms.float32)
-    SGD = Momentum(net.trainable_params(), lr, 0.9)
-    optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name,
+    sgd = Momentum(net.trainable_params(), lr, 0.9)
+    optimizer = LARS(sgd, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name,
                      lars_filter=lambda x: 'bn' not in x.name)
     net_with_loss = NetWithLoss(net, strategy3)
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
diff --git a/tests/ut/python/parallel/test_matmul_tensor.py b/tests/ut/python/parallel/test_matmul_tensor.py
index 4af9fabbf4..329d8653a0 100644
--- a/tests/ut/python/parallel/test_matmul_tensor.py
+++ b/tests/ut/python/parallel/test_matmul_tensor.py
@@ -46,7 +46,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y)
 
 
-def compile(net, x, y):
+def compile_net(net, x, y):
     net.set_auto_parallel()
     _executor.compile(net, x, y)
 
@@ -79,7 +79,7 @@ def test_two_matmul():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 128]), dtype=ms.float32)
 
-    compile(net, x, y)
+    compile_net(net, x, y)
 
 
 def test_matmul_mul_broadcast2():
@@ -103,7 +103,7 @@ def test_matmul_mul_broadcast2():
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 1]), dtype=ms.float32)
-    compile(net, x, y)
+    compile_net(net, x, y)
 
 
 def test_two_matmul1():
@@ -133,7 +133,7 @@ def test_two_matmul1():
     x = Tensor(np.ones([128, 128]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128]), dtype=ms.float32)
 
-    compile(net, x, y)
+    compile_net(net, x, y)
 
 
 def test_matmul_add_tensor():
@@ -158,4 +158,4 @@ def test_matmul_add_tensor():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
 
-    compile(net, x, y)
+    compile_net(net, x, y)
diff --git a/tests/ut/python/parallel/test_neg.py b/tests/ut/python/parallel/test_neg.py
index a44b851a74..34819373d6 100644
--- a/tests/ut/python/parallel/test_neg.py
+++ b/tests/ut/python/parallel/test_neg.py
@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -52,7 +52,7 @@ def test_neg_data_parallel():
     strategy1 = ((16, 1, 1), (16, 1, 1))
     strategy2 = ((16, 1, 1),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_neg_model_parallel():
@@ -60,7 +60,7 @@ def test_neg_model_parallel():
     strategy1 = ((1, 1, 16), (1, 1, 16))
     strategy2 = ((1, 1, 16),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_neg_hybrid_parallel():
@@ -68,13 +68,13 @@ def test_neg_hybrid_parallel():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((2, 2, 4),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_neg_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1)
-    compile(net)
+    compile_net(net)
 
 
 def test_neg_repeat_calc():
@@ -82,4 +82,4 @@ def test_neg_repeat_calc():
     strategy1 = ((2, 2, 4), (2, 2, 4))
     strategy2 = ((1, 2, 2),)
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py
index 86626aabf1..056f4a15c7 100644
--- a/tests/ut/python/parallel/test_one_dev.py
+++ b/tests/ut/python/parallel/test_one_dev.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import re
+import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
diff --git a/tests/ut/python/parallel/test_one_hot_net.py b/tests/ut/python/parallel/test_one_hot_net.py
index ba067781a6..db152ff4b8 100644
--- a/tests/ut/python/parallel/test_one_hot_net.py
+++ b/tests/ut/python/parallel/test_one_hot_net.py
@@ -159,8 +159,8 @@ class SemiAutoOneHotNet(Cell):
         weight_np = np.zeros(weight_shape, np.float32)
         self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')
 
-    def construct(self, input, label):
-        input_n = self.normalize(input)
+    def construct(self, input_, label):
+        input_n = self.normalize(input_)
         w = self.normalize2(self.weight)
         fc_o = self.fc(input_n, w)
         fc_o_shape = F.shape(fc_o)
@@ -209,9 +209,8 @@ class Dataset(MindData):
             raise StopIteration
         self.index += 1
         if self.input_num == 2:
-            return self.predict, self.label
-        else:
-            return self.predict,
+            return (self.predict, self.label)
+        return (self.predict,)
 
     def reset(self):
         self.index = 0
@@ -268,20 +267,20 @@ def test_bn_reshape_dense_bn_train_loss():
     batch_size = 16
     device_num = 16
     context.set_auto_parallel_context(device_num=device_num, global_rank=0)
-    input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([batch_size]), dtype=ms.int32)
 
     net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
     net.set_auto_parallel()
 
-    _executor.compile(net, input, label)
+    _executor.compile(net, input_, label)
 
 
 def test_semi_one_hot_net_batch():
     batch_size = 16
     context.set_auto_parallel_context(device_num=device_num, global_rank=0)
-    input = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([batch_size]), dtype=ms.int32)
 
     net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch())
@@ -289,7 +288,7 @@ def test_semi_one_hot_net_batch():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
     net.set_auto_parallel()
 
-    _executor.compile(net, input, label)
+    _executor.compile(net, input_, label)
 
 
 def test_semi_one_hot_net_model():
diff --git a/tests/ut/python/parallel/test_one_weight_parameter.py b/tests/ut/python/parallel/test_one_weight_parameter.py
index 7ba812f24a..ac98b9320f 100644
--- a/tests/ut/python/parallel/test_one_weight_parameter.py
+++ b/tests/ut/python/parallel/test_one_weight_parameter.py
@@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple
 from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
diff --git a/tests/ut/python/parallel/test_onehot.py b/tests/ut/python/parallel/test_onehot.py
index 68983c3dc9..81152d4849 100644
--- a/tests/ut/python/parallel/test_onehot.py
+++ b/tests/ut/python/parallel/test_onehot.py
@@ -126,15 +126,6 @@ def test_onehot_auto():
     compile_graph(strategy1, strategy2, strategy3, strategy4, auto=True)
 
 
-def test_onehot_model_parallel():
-    context.set_auto_parallel_context(device_num=16, global_rank=0)
-    strategy1 = ((2, 4), (4, 2))
-    strategy2 = ((2, 8),)
-    strategy3 = ((1, 16), (), ())
-    strategy4 = ((16, 1), (16, 1))
-    compile_graph(strategy1, strategy2, strategy3, strategy4)
-
-
 def test_onehot_batch_parallel_axis0():
     context.set_auto_parallel_context(device_num=16, global_rank=0)
     strategy1 = ((2, 4), (4, 2))
diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py
index cb7d013c51..26f804537b 100644
--- a/tests/ut/python/parallel/test_operator_model_parallel.py
+++ b/tests/ut/python/parallel/test_operator_model_parallel.py
@@ -21,8 +21,6 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore.nn.cell import Cell
-from mindspore.nn.layer.activation import ReLU
-from mindspore.nn.layer.basic import Dense
 from mindspore.nn.layer.basic import Flatten
 from mindspore.nn.layer.conv import Conv2d
 from mindspore.nn.layer.normalization import BatchNorm2d
@@ -61,8 +59,7 @@ class DenseWrap(Cell):
         self.has_bias = has_bias
 
         self.weight = Parameter(initializer(
-            weight_init, [output_channels, input_channels]),
-            name="weight")
+            weight_init, [output_channels, input_channels]), name="weight")
 
         if self.has_bias:
             self.bias = Parameter(initializer(
@@ -103,7 +100,7 @@ class DatasetLenet(MindData):
         self.index = 0
 
 
-def conv3x3(in_channels, out_channels, stride=1, padding=1):
+def conv3x3(in_channels, out_channels, stride=1):
     """3x3 convolution """
     weight_shape = (out_channels, in_channels, 3, 3)
     weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -114,7 +111,7 @@ def conv3x3(in_channels, out_channels, stride=1, padding=1):
     return conv
 
 
-def conv1x1(in_channels, out_channels, stride=1, padding=0):
+def conv1x1(in_channels, out_channels, stride=1):
     """1x1 convolution"""
     weight_shape = (out_channels, in_channels, 1, 1)
     weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -125,7 +122,7 @@ def conv1x1(in_channels, out_channels, stride=1, padding=0):
     return conv
 
 
-def conv7x7(in_channels, out_channels, stride=1, padding=0):
+def conv7x7(in_channels, out_channels, stride=1):
     """1x1 convolution"""
     weight_shape = (out_channels, in_channels, 7, 7)
     weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -186,18 +183,17 @@ class ResidualBlock(Cell):
     def __init__(self,
                  in_channels,
                  out_channels,
-                 stride=1,
-                 down_sample=False):
+                 stride=1):
         super(ResidualBlock, self).__init__()
 
         out_chls = out_channels // self.expansion
-        self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
+        self.conv1 = conv1x1(in_channels, out_chls, stride=1)
         self.bn1 = bn_with_initialize(out_chls)
 
-        self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0)
+        self.conv2 = conv3x3(out_chls, out_chls, stride=stride)
         self.bn2 = bn_with_initialize(out_chls)
 
-        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1)
         self.bn3 = bn_with_initialize_last(out_channels)
 
         self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
@@ -236,21 +232,21 @@ class ResidualBlockWithDown(Cell):
         super(ResidualBlockWithDown, self).__init__()
 
         out_chls = out_channels // self.expansion
-        self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
+        self.conv1 = conv1x1(in_channels, out_chls, stride=1)
         self.bn1 = bn_with_initialize(out_chls)
 
-        self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0)
+        self.conv2 = conv3x3(out_chls, out_chls, stride=stride)
         self.bn2 = bn_with_initialize(out_chls)
 
-        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1)
         self.bn3 = bn_with_initialize_last(out_channels)
 
         self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
         self.relu2 = P.ReLU().set_strategy(strategy_no_weight)
         self.relu3 = P.ReLU().set_strategy(strategy_no_weight)
-        self.downSample = down_sample
+        self.down_sample = down_sample
 
-        self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0)
+        self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride)
         self.bn_down_sample = bn_with_initialize(out_channels)
         self.add = TensorAdd().set_strategy(strategy_add)
 
@@ -279,7 +275,7 @@ class ResidualBlockWithDown(Cell):
 
 class MakeLayer0(Cell):
 
-    def __init__(self, block, layer_num, in_channels, out_channels, stride):
+    def __init__(self, block, in_channels, out_channels, stride):
         super(MakeLayer0, self).__init__()
         self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True)
         self.b = block(out_channels, out_channels, stride=stride)
@@ -295,14 +291,14 @@ class MakeLayer0(Cell):
 
 class ResNet(Cell):
 
-    def __init__(self, block, layer_num, num_classes=100):
+    def __init__(self, block, num_classes=100):
         super(ResNet, self).__init__()
-        self.conv1 = conv7x7(3, 64, stride=2, padding=3)
+        self.conv1 = conv7x7(3, 64, stride=2)
         self.bn1 = bn_with_initialize(64)
         self.relu = P.ReLU().set_strategy(strategy_no_weight)
         self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
         self.layer1 = MakeLayer0(
-            block, layer_num[0], in_channels=64, out_channels=256, stride=1)
+            block, in_channels=64, out_channels=256, stride=1)
         self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
         self.fc = fc_with_initialize(64 * block.expansion, num_classes)
         self.flatten = Flatten()
@@ -320,12 +316,12 @@ class ResNet(Cell):
 
 
 class ResNetModelParallel(Cell):
-    def __init__(self, block, layer_num, num_classes=100):
+    def __init__(self, block, num_classes=100):
         super(ResNetModelParallel, self).__init__()
         self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),))
         self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
         self.layer1 = MakeLayer0(
-            block, layer_num[0], in_channels=64, out_channels=256, stride=1)
+            block, in_channels=64, out_channels=256, stride=1)
         self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
         self.fc = fc_with_initialize(64 * block.expansion, num_classes)
         self.flatten = Flatten()
@@ -341,11 +337,11 @@ class ResNetModelParallel(Cell):
 
 
 def resnet_operator_net(num_classes):
-    return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes)
+    return ResNet(ResidualBlock, num_classes)
 
 
 def resnet_model_parallel_net(num_classes):
-    return ResNetModelParallel(ResidualBlock, [3, 4, 6, 3], num_classes)
+    return ResNetModelParallel(ResidualBlock, num_classes)
 
 
 def test_resnet_operator_batch_parallel():
@@ -354,7 +350,6 @@ def test_resnet_operator_batch_parallel():
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
-    rank_size = dev_num
 
     context.reset_auto_parallel_context()
     context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
@@ -381,7 +376,6 @@ def test_resnet_model_parallel():
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
-    rank_size = dev_num
 
     context.reset_auto_parallel_context()
     context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
diff --git a/tests/ut/python/parallel/test_optimizer_clone_weight.py b/tests/ut/python/parallel/test_optimizer_clone_weight.py
index 8570aede2f..baf5e74861 100644
--- a/tests/ut/python/parallel/test_optimizer_clone_weight.py
+++ b/tests/ut/python/parallel/test_optimizer_clone_weight.py
@@ -35,7 +35,7 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict, b)[0]
 
 
-def compile(net, x, b):
+def compile_net(net, x, b):
     net.set_auto_parallel()
     _Executor().compile(net, x, b)
 
@@ -72,7 +72,7 @@ def test_optimizer_clone_weight():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
 
 
 def test_optimizer_clone_weight2():
@@ -107,4 +107,4 @@ def test_optimizer_clone_weight2():
     train_net = TrainOneStepCell(net_with_loss, optimizer)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(train_net, x, b)
+    compile_net(train_net, x, b)
diff --git a/tests/ut/python/parallel/test_parameter_init.py b/tests/ut/python/parallel/test_parameter_init.py
index 1daf3be7a5..bd36876f0c 100644
--- a/tests/ut/python/parallel/test_parameter_init.py
+++ b/tests/ut/python/parallel/test_parameter_init.py
@@ -52,7 +52,7 @@ def test_parameter_init():
     weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
 
     net = Net(strategy1, weight)
-    net(x, )
+    net(x,)
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py
index ac84c1364c..6f0200c14b 100644
--- a/tests/ut/python/parallel/test_prelu.py
+++ b/tests/ut/python/parallel/test_prelu.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y)
 
 
-def compile(net, x, y):
+def compile_net(net, x, y):
     net.set_auto_parallel()
     _executor.compile(net, x, y)
 
@@ -63,7 +63,7 @@ def test_prelu_single_success1():
     net = GradWrap(NetWithLoss(Net()))
     x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
     w = Tensor(np.random.rand(33), ms.float32)
-    compile(net, x, w)
+    compile_net(net, x, w)
 
 
 def test_prelu_single_success2():
@@ -80,7 +80,7 @@ def test_prelu_single_success2():
     net = GradWrap(NetWithLoss(Net()))
     x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
     w = Tensor([0.1], ms.float32)
-    compile(net, x, w)
+    compile_net(net, x, w)
 
 
 def test_prelu_parallel_success1():
@@ -100,7 +100,7 @@ def test_prelu_parallel_success1():
     x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32)
     w = Tensor(np.random.rand(4), dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    compile(net, x, w)
+    compile_net(net, x, w)
 
 
 def test_prelu_parallel_success2():
@@ -120,7 +120,7 @@ def test_prelu_parallel_success2():
     x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32)
     w = Tensor(np.random.rand(4), dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    compile(net, x, w)
+    compile_net(net, x, w)
 
 
 def test_prelu_parallel_success3():
@@ -183,7 +183,7 @@ def test_prelu_parallel_success4():
     x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32)
     w = Tensor(np.random.rand(16), dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    compile(net, x, w)
+    compile_net(net, x, w)
 
 
 def test_prelu_parallel_success5():
@@ -203,4 +203,4 @@ def test_prelu_parallel_success5():
     x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32)
     w = Tensor(np.random.rand(1), dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
-    compile(net, x, w)
+    compile_net(net, x, w)
diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py
index 40f8ab2701..dca467ef8d 100644
--- a/tests/ut/python/parallel/test_prelu_cell.py
+++ b/tests/ut/python/parallel/test_prelu_cell.py
@@ -47,9 +47,8 @@ class Dataset(MindData):
             raise StopIteration
         self.index += 1
         if self.input_num == 2:
-            return self.predict, self.label
-        else:
-            return self.predict,
+            return (self.predict, self.label)
+        return (self.predict,)
 
     def reset(self):
         self.index = 0
@@ -68,7 +67,7 @@ class PReLU(nn.Cell):
         if not isinstance(w, Tensor):
             raise TypeError("w only support np.float32, float or Tensor type.")
 
-        self.w = Parameter(initializer(w, [channel, ]), name='a')
+        self.w = Parameter(initializer(w, [channel,]), name='a')
         self.prelu = P.PReLU()
         self.relu = P.ReLU().set_strategy(((1,),))
         self.sub = P.Sub().set_strategy(((1,), (1,)))
@@ -97,7 +96,6 @@ def prelu_net():
 
 
 def reshape_common(parallel_mode):
-    batch_size = 32
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
diff --git a/tests/ut/python/parallel/test_reduce_method_info.py b/tests/ut/python/parallel/test_reduce_method_info.py
index 530454fc1a..ca2bcb68fc 100644
--- a/tests/ut/python/parallel/test_reduce_method_info.py
+++ b/tests/ut/python/parallel/test_reduce_method_info.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -74,7 +74,7 @@ def test_sum_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul2():
@@ -101,7 +101,7 @@ def test_sum_mul2():
     x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul3():
@@ -128,7 +128,7 @@ def test_sum_mul3():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul4():
@@ -155,7 +155,7 @@ def test_sum_mul4():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32, 1]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul5():
@@ -179,7 +179,7 @@ def test_sum_mul5():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul6():
@@ -203,7 +203,7 @@ def test_sum_mul6():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_sum_mul7():
@@ -227,7 +227,7 @@ def test_sum_mul7():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([1, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_max_mul():
@@ -254,7 +254,7 @@ def test_max_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_min_mul():
@@ -281,7 +281,7 @@ def test_min_mul():
     x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_reduce_mean_mul_float32():
@@ -309,7 +309,7 @@ def test_reduce_mean_mul_float32():
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
 
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 class ArgMaxWithValueNet(nn.Cell):
@@ -321,7 +321,7 @@ class ArgMaxWithValueNet(nn.Cell):
 
     def construct(self, x, y, b):
         out = self.mul1(x, y)
-        index, out = self.arg_max_with_value(out)
+        _, out = self.arg_max_with_value(out)
         out = self.mul2(out, b)
         return out
 
@@ -335,16 +335,16 @@ class ArgMinWithValueNet(nn.Cell):
 
     def construct(self, x, y, b):
         out = self.mul1(x, y)
-        index, out = self.arg_min_with_value(out)
+        _, out = self.arg_min_with_value(out)
         out = self.mul2(out, b)
         return out
 
 
-def gen_inputs_and_compile(net):
+def gen_inputs_and_compile_net(net):
     x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
     y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():
@@ -354,7 +354,7 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():
     strategy3 = ((2, 4), (2, 4))
     net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_max_with_value_mul_semi():
@@ -364,7 +364,7 @@ def test_arg_max_with_value_mul_semi():
     strategy3 = ((2, 4), (2, 4))
     net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_max_with_value_mul_auto():
@@ -374,7 +374,7 @@ def test_arg_max_with_value_mul_auto():
     strategy3 = None
     net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_min_with_value_mul_semi_axis_parallel():
@@ -384,7 +384,7 @@ def test_arg_min_with_value_mul_semi_axis_parallel():
     strategy3 = ((2, 4), (2, 4))
     net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_min_with_value_mul_semi():
@@ -394,7 +394,7 @@ def test_arg_min_with_value_mul_semi():
     strategy3 = ((2, 4), (2, 4))
     net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_min_with_value_mul_auto():
@@ -404,7 +404,7 @@ def test_arg_min_with_value_mul_auto():
     strategy3 = None
     net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 class ArgMinWithValueNet2(nn.Cell):
@@ -416,7 +416,7 @@ class ArgMinWithValueNet2(nn.Cell):
 
     def construct(self, x, y, b):
         out = self.mul1(x, y)
-        index, out = self.arg_min_with_value(out)
+        _, out = self.arg_min_with_value(out)
         out = self.relu(out)
         return out
 
@@ -428,7 +428,7 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2():
     strategy3 = ((2, 4, 1),)
     net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_min_with_value_mul_semi2():
@@ -438,7 +438,7 @@ def test_arg_min_with_value_mul_semi2():
     strategy3 = ((2, 4, 1),)
     net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_arg_min_with_value_mul_auto2():
@@ -448,7 +448,7 @@ def test_arg_min_with_value_mul_auto2():
     strategy3 = None
     net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    gen_inputs_and_compile(net)
+    gen_inputs_and_compile_net(net)
 
 
 def test_cross_batch():
@@ -475,7 +475,7 @@ def test_cross_batch():
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cross_batch2():
@@ -502,7 +502,7 @@ def test_cross_batch2():
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_cross_batch_auto():
@@ -526,7 +526,7 @@ def test_cross_batch_auto():
     x = Tensor(np.ones([32, 64]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_max_empty_tuple():
@@ -554,4 +554,4 @@ def test_max_empty_tuple():
     y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([128, 32]), dtype=ms.float32)
 
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index f3e4160b01..7d671b381b 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -18,7 +18,6 @@ import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore import context
-from mindspore.common import dtype as mstype
 from mindspore.common.api import _executor
 from mindspore.common.parameter import Parameter
 from mindspore.common.parameter import ParameterTuple
@@ -54,9 +53,8 @@ class Dataset(MindData):
             raise StopIteration
         self.index += 1
         if self.input_num == 2:
-            return self.predict, self.label
-        else:
-            return self.predict,
+            return (self.predict, self.label)
+        return (self.predict,)
 
     def reset(self):
         self.index = 0
@@ -82,7 +80,6 @@ def reshape_net(strategy0, strategy1, strategy2):
 
 
 def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss):
-    batch_size = 32
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
@@ -306,21 +303,21 @@ class ReshapeNet6(nn.Cell):
         return matmul2_o
 
 
-def compile(net, input):
+def compile_net(net, input_):
     net.set_auto_parallel()
-    _executor.compile(net, input)
+    _executor.compile(net, input_)
 
 
 def reshape_net2(backbone):
     batch_size = 16
     device_num = 16
     context.set_auto_parallel_context(device_num=device_num, global_rank=0)
-    input = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)
 
     net = GradWrap(NetWithLoss(backbone))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(net, input)
+    compile_net(net, input_)
 
 
 def test_reshape_net1_1():
@@ -480,11 +477,11 @@ def test_batchnorm_reshape_train():
     device_num = 16
     context.set_auto_parallel_context(device_num=device_num, global_rank=0)
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    input = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01)
 
     net = GradWrap(NetWithLoss(BatchNormReshapeNet()))
 
-    compile(net, input)
+    compile_net(net, input_)
 
 
 def bn_with_initialize(out_channels):
@@ -517,12 +514,12 @@ def test_bn_reshape_dense_bn_train():
     batch_size = 16
     device_num = 16
     context.set_auto_parallel_context(device_num=device_num, global_rank=0)
-    input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
+    input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
 
     net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    compile(net, input)
+    compile_net(net, input_)
 
 
 class ParallelReduceMeanNet(nn.Cell):
diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py
index 214b6cb50d..4eee4a2def 100644
--- a/tests/ut/python/parallel/test_reshape_parameter.py
+++ b/tests/ut/python/parallel/test_reshape_parameter.py
@@ -58,7 +58,7 @@ class Net(nn.Cell):
         return out
 
 
-def compile(net, x, y):
+def compile_net(net, x, y):
     net.set_auto_parallel()
     _executor.compile(net, x, y)
 
@@ -69,7 +69,7 @@ def test_reshape_parameter_data_parallel():
     net = GradWrap(NetWithLoss(Net(strategy)))
     x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
     y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
-    compile(net, x, y)
+    compile_net(net, x, y)
 
 
 def test_reshape_parameter_model_parallel():
@@ -78,4 +78,4 @@ def test_reshape_parameter_model_parallel():
     net = GradWrap(NetWithLoss(Net(strategy)))
     x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
     y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
-    compile(net, x, y)
+    compile_net(net, x, y)
diff --git a/tests/ut/python/parallel/test_scalar_loss.py b/tests/ut/python/parallel/test_scalar_loss.py
index 90fd966422..90291caed4 100644
--- a/tests/ut/python/parallel/test_scalar_loss.py
+++ b/tests/ut/python/parallel/test_scalar_loss.py
@@ -22,7 +22,6 @@ from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import functional as F
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class GradWrap(nn.Cell):
diff --git a/tests/ut/python/parallel/test_set_auto_parallel_context.py b/tests/ut/python/parallel/test_set_auto_parallel_context.py
index 9df9833b16..4343f34d78 100644
--- a/tests/ut/python/parallel/test_set_auto_parallel_context.py
+++ b/tests/ut/python/parallel/test_set_auto_parallel_context.py
@@ -30,10 +30,10 @@ def test_set_auto_parallel_context():
     parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast")
     assert device_num == 4
     assert global_rank == 3
-    assert mirror_mean == True
-    assert cast_before_mirror == False
+    assert mirror_mean
+    assert not cast_before_mirror
     assert parallel_mode == "auto_parallel"
-    assert parameter_broadcast == False
+    assert not parameter_broadcast
 
     auto_parallel_context().set_communication_backend("hccl")
     backend = auto_parallel_context().get_communication_backend()
@@ -43,7 +43,7 @@ def test_set_auto_parallel_context():
     device_num = auto_parallel_context().get_device_num()
     device_num_is_set = auto_parallel_context().get_device_num_is_set()
     assert device_num == 4
-    assert device_num_is_set == True
+    assert device_num_is_set
 
     auto_parallel_context().set_global_rank(4)
     global_rank = auto_parallel_context().get_global_rank()
@@ -51,14 +51,14 @@ def test_set_auto_parallel_context():
 
     auto_parallel_context().set_mirror_mean(True)
     mirror_mean = auto_parallel_context().get_mirror_mean()
-    assert mirror_mean == True
+    assert mirror_mean
 
     auto_parallel_context().set_cast_before_mirror(False)
     cast_before_mirror = auto_parallel_context().get_cast_before_mirror()
-    assert cast_before_mirror == False
+    assert not cast_before_mirror
 
     parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set()
-    assert parameter_broadcast_is_set == True
+    assert parameter_broadcast_is_set
 
     with pytest.raises(ValueError):
         context.set_auto_parallel_context(device_num=0)
@@ -94,9 +94,9 @@ def test_reset_auto_parallel_context():
     parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set()
     assert device_num == 1
     assert global_rank == 0
-    assert mirror_mean == False
-    assert cast_before_mirror == True
+    assert not mirror_mean
+    assert cast_before_mirror
     assert parallel_mode == "stand_alone"
-    assert parameter_broadcast == False
-    assert device_num_is_set == False
-    assert parameter_broadcast_is_set == False
+    assert not parameter_broadcast
+    assert not device_num_is_set
+    assert not parameter_broadcast_is_set
diff --git a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
index c905950df2..b4b956a9e7 100644
--- a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
+++ b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32)
 _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
@@ -52,7 +52,7 @@ def test_sigmoid_cross_entropy_with_logits_data_parallel():
     strategy1 = ((16, 1), (16, 1))
     strategy2 = ((16, 1), (16, 1))
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_sigmoid_cross_entropy_with_logits_model_parallel():
@@ -60,7 +60,7 @@ def test_sigmoid_cross_entropy_with_logits_model_parallel():
     strategy1 = ((1, 16), (1, 16))
     strategy2 = ((1, 16), (1, 16))
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_sigmoid_cross_entropy_with_logits_hybrid_parallel():
@@ -68,13 +68,13 @@ def test_sigmoid_cross_entropy_with_logits_hybrid_parallel():
     strategy1 = ((2, 8), (2, 8))
     strategy2 = ((2, 8), (2, 8))
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_sigmoid_cross_entropy_with_logits_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(_w1)
-    compile(net)
+    compile_net(net)
 
 
 def test_sigmoid_cross_entropy_with_logits_repeat_calc():
@@ -82,4 +82,4 @@ def test_sigmoid_cross_entropy_with_logits_repeat_calc():
     strategy1 = ((2, 8), (2, 8))
     strategy2 = ((2, 2), (2, 2))
     net = Net(_w1, strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
index d6c2072918..062d29a136 100644
--- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
+++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
@@ -21,7 +21,6 @@ from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class NetWithLoss(nn.Cell):
@@ -44,7 +43,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -71,7 +70,7 @@ def test_softmax_cross_entropy_loss():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_softmax_cross_entropy_loss_repeated_calculation():
@@ -96,7 +95,7 @@ def test_softmax_cross_entropy_loss_repeated_calculation():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_softmax_cross_entropy_loss_auto_batch_parallel():
@@ -118,4 +117,4 @@ def test_softmax_cross_entropy_loss_auto_batch_parallel():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_split_grad_sens.py b/tests/ut/python/parallel/test_split_grad_sens.py
index 47e95dc454..6da1856cba 100644
--- a/tests/ut/python/parallel/test_split_grad_sens.py
+++ b/tests/ut/python/parallel/test_split_grad_sens.py
@@ -22,7 +22,6 @@ from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class GradWrap(nn.Cell):
@@ -54,7 +53,7 @@ class GradWrap3(nn.Cell):
         return C.grad_all(self.network)(x, y, bias)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -81,7 +80,7 @@ def test_no_grad():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_grad_sens_parameter_type():
@@ -135,7 +134,7 @@ def test_grad_sens_tensor_type():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_grad_sens_scalar_broadcast():
@@ -159,4 +158,4 @@ def test_grad_sens_scalar_broadcast():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_squeeze_info.py b/tests/ut/python/parallel/test_squeeze_info.py
index eba77db3e6..0b3144346e 100644
--- a/tests/ut/python/parallel/test_squeeze_info.py
+++ b/tests/ut/python/parallel/test_squeeze_info.py
@@ -15,9 +15,9 @@
 import numpy as np
 
 import mindspore as ms
-from mindspore import context, Tensor, Parameter
+from mindspore import context, Tensor
 from mindspore.common.api import _executor
-from mindspore.nn import Cell, TrainOneStepCell, Momentum
+from mindspore.nn import Cell
 from mindspore.ops import operations as P
 
 
@@ -37,7 +37,7 @@ _x = Tensor(np.ones([64, 1, 32, 1]), dtype=ms.float32)
 _b = Tensor(np.ones([64, 32]), dtype=ms.float32)
 
 
-def compile(net):
+def compile_net(net):
     net.set_auto_parallel()
     _executor.compile(net, _x, _b)
     context.reset_auto_parallel_context()
@@ -48,7 +48,7 @@ def test_squeeze_data_parallel():
     strategy1 = ((16, 1, 1, 1),)
     strategy2 = ((16, 1), (16, 1))
     net = Net(strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_squeeze_model_parallel():
@@ -56,7 +56,7 @@ def test_squeeze_model_parallel():
     strategy1 = ((1, 1, 16, 1),)
     strategy2 = ((1, 16), (1, 16))
     net = Net(strategy1, strategy2)
-    compile(net)
+    compile_net(net)
 
 
 def test_squeeze_specified_axis():
@@ -64,13 +64,13 @@ def test_squeeze_specified_axis():
     strategy1 = ((4, 1, 4, 1),)
     strategy2 = ((8, 2), (8, 2))
     net = Net(strategy1, strategy2, (1, 3))
-    compile(net)
+    compile_net(net)
 
 
 def test_squeeze_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net()
-    compile(net)
+    compile_net(net)
 
 
 def test_squeeze_repeat_calc():
@@ -78,4 +78,4 @@ def test_squeeze_repeat_calc():
     strategy1 = ((1, 1, 8, 1),)
     strategy2 = ((2, 8), (2, 8))
     net = Net(strategy1, strategy2)
-    compile(net)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_sum_as_loss.py b/tests/ut/python/parallel/test_sum_as_loss.py
index 336783f576..fc66df9d01 100644
--- a/tests/ut/python/parallel/test_sum_as_loss.py
+++ b/tests/ut/python/parallel/test_sum_as_loss.py
@@ -21,7 +21,6 @@ from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from tests.ut.python.ops.test_math_ops import VirtualLoss
 
 
 class GradWrap(nn.Cell):
@@ -33,7 +32,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, bias)
 
 
-def compile(net, x, y, bias):
+def compile_net(net, x, y, bias):
     net.set_auto_parallel()
     _executor.compile(net, x, y, bias)
 
@@ -59,7 +58,7 @@ def test_sum_as_loss():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
 
 
 def test_sum_as_loss2():
@@ -83,4 +82,4 @@ def test_sum_as_loss2():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 32]), dtype=ms.float32)
     bias = Tensor(np.ones([64]), dtype=ms.float32)
-    compile(net, x, y, bias)
+    compile_net(net, x, y, bias)
diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py
index f712416323..b0b917bf19 100644
--- a/tests/ut/python/parallel/test_transpose.py
+++ b/tests/ut/python/parallel/test_transpose.py
@@ -17,7 +17,6 @@ import numpy as np
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor, context
-from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
@@ -67,7 +66,6 @@ def transpose_net(strategy1, strategy2):
 
 
 def transpose_common(strategy1, strategy2):
-    batch_size = 32
     learning_rate = 0.1
     momentum = 0.9
     epoch_size = 2
diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py
index 6e8b2085b6..daee920a91 100644
--- a/tests/ut/python/parallel/test_two_matmul.py
+++ b/tests/ut/python/parallel/test_two_matmul.py
@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
         return C.grad_all(self.network)(x, y, b)
 
 
-def compile(net, x, y, b):
+def compile_net(net, x, y, b):
     net.set_auto_parallel()
     _executor.compile(net, x, y, b)
 
@@ -72,7 +72,7 @@ def test_two_matmul():
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
 
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_two_matmul_repeated_calculation1():
@@ -96,7 +96,7 @@ def test_two_matmul_repeated_calculation1():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
 
 
 def test_two_matmul_repeated_calculation2():
@@ -120,4 +120,4 @@ def test_two_matmul_repeated_calculation2():
     x = Tensor(np.ones([128, 32]), dtype=ms.float32)
     y = Tensor(np.ones([32, 64]), dtype=ms.float32)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    compile(net, x, y, b)
+    compile_net(net, x, y, b)
diff --git a/tests/ut/python/parallel/test_two_weights_parameter.py b/tests/ut/python/parallel/test_two_weights_parameter.py
index b010049b14..1684cf96dc 100644
--- a/tests/ut/python/parallel/test_two_weights_parameter.py
+++ b/tests/ut/python/parallel/test_two_weights_parameter.py
@@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple
 from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
index 95aec05d32..e7ea717dc9 100644
--- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py
+++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
@@ -78,7 +78,7 @@ def test_virtual_dataset_3_input():
 
 def test_virtualdataset_cell_3_inputs():
     class Net(nn.Cell):
-        def __init__(self, strategy0, strategy1, strategy2, strategy3):
+        def __init__(self, strategy1, strategy2, strategy3):
             super().__init__()
             self.matmul1 = P.MatMul().set_strategy(strategy1)
             self.matmul2 = P.MatMul().set_strategy(strategy2)
@@ -89,7 +89,7 @@ def test_virtualdataset_cell_3_inputs():
             out = self.matmul2(out, b)
             return out
 
-    net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None, None))))
+    net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
     context.set_context(save_graphs=True)
     context.set_auto_parallel_context(parallel_mode="auto_parallel")
     context.set_auto_parallel_context(device_num=8, global_rank=0)