From 14fe72f38379fe769c4dc920028f327de47d302d Mon Sep 17 00:00:00 2001 From: Yi Huaijie Date: Fri, 22 May 2020 09:13:09 +0800 Subject: [PATCH] fix pylint warnings --- .../st/auto_parallel/onehot_model_parallel.py | 308 ++++---- .../soft_entropy_loss_expand_parallel.py | 551 +++++++------- tests/st/auto_parallel/test_expand_loss.py | 52 +- .../test_model_parallel_onehot.py | 43 +- .../test_resnet50_expand_loss_2p.py | 35 +- tests/ut/python/communication/__init__.py | 34 +- tests/ut/python/communication/test_comm.py | 1 - .../communication/test_data_parallel_lenet.py | 3 +- tests/ut/python/parallel/__init__.py | 2 +- .../add_relu/_test_add_relu_parallel_4p.py | 356 ++++----- .../_test_conv2d_parallel_4p.py | 712 +++++++++--------- .../dropout/_test_dropout_parallel_4p.py | 240 +++--- .../hcom/_test_allgather_4p.py | 308 ++++---- .../hcom/_test_allreduce_4p.py | 350 ++++----- .../_test_l2normalize_parallel_4p.py | 413 +++++----- .../loss/_test_loss_parallel_4p.py | 391 +++++----- .../matmul/_test_matmul_parallel_4p.py | 658 ++++++++-------- .../max/_test_max_parallel_4p.py | 427 ++++++----- .../need_fix_test_mul_softmax_parallel_4p.py | 401 +++++----- .../onehot/_test_onehot_parallel_4p.py | 296 ++++---- .../prelu/_test_prelu_parallel_4p.py | 412 +++++----- .../_test_reducemean_parallel_4p.py | 505 +++++++------ .../reshape/_test_reshape_parallel_4p.py | 412 +++++----- .../transpose/_test_transpose_parallel_4p.py | 471 ++++++------ .../parallel/test_add_relu_redistribution.py | 6 +- .../python/parallel/test_allreduce_fusion.py | 41 +- tests/ut/python/parallel/test_alltoall.py | 3 +- tests/ut/python/parallel/test_arithmetic.py | 40 +- .../parallel/test_auto_parallel_BN_PReLU.py | 1 - .../parallel/test_auto_parallel_arithmetic.py | 11 +- ...t_auto_parallel_assign_sub_with_ref_key.py | 3 +- .../test_auto_parallel_double_subgraphs.py | 1 - .../test_auto_parallel_four_matmul.py | 8 +- .../parallel/test_auto_parallel_inference.py | 2 +- .../test_auto_parallel_matmul_prelu.py | 2 +- .../test_auto_parallel_parameter_cast.py | 1 - .../test_auto_parallel_reduce_method.py | 8 +- .../parallel/test_auto_parallel_reshape.py | 4 +- .../parallel/test_auto_parallel_rhombus.py | 8 +- .../test_auto_parallel_softmax_loss.py | 1 - .../test_auto_parallel_transformer.py | 4 +- .../parallel/test_auto_parallel_two_bn.py | 22 +- .../parallel/test_auto_parallel_two_matmul.py | 12 +- .../parallel/test_auto_star_elimination.py | 5 +- tests/ut/python/parallel/test_batch_matmul.py | 14 +- .../parallel/test_batchnorm_batch_parallel.py | 6 +- .../ut/python/parallel/test_bn_prelu_cell.py | 13 +- tests/ut/python/parallel/test_bool_grad.py | 2 +- .../ut/python/parallel/test_broadcast_dict.py | 4 +- .../parallel/test_comparison_function_info.py | 22 +- tests/ut/python/parallel/test_dataset_util.py | 8 +- tests/ut/python/parallel/test_dense_matmul.py | 4 +- .../test_different_type_for_div_op.py | 8 +- .../python/parallel/test_dropout_do_mask.py | 12 +- .../parallel/test_element_wise_function.py | 22 +- tests/ut/python/parallel/test_expand_dims.py | 14 +- .../ut/python/parallel/test_forward_graph.py | 12 +- tests/ut/python/parallel/test_gather_v2.py | 1 - .../parallel/test_gather_v2_primitive.py | 20 +- tests/ut/python/parallel/test_get_next.py | 22 +- .../parallel/test_get_parameter_layout.py | 4 +- .../test_hybird_parallel_activation.py | 16 +- tests/ut/python/parallel/test_layer_norm.py | 14 +- tests/ut/python/parallel/test_linear.py | 1 - .../parallel/test_loss_and_optimizer.py | 19 +- .../ut/python/parallel/test_matmul_tensor.py | 10 +- tests/ut/python/parallel/test_neg.py | 12 +- tests/ut/python/parallel/test_one_dev.py | 2 +- tests/ut/python/parallel/test_one_hot_net.py | 17 +- .../parallel/test_one_weight_parameter.py | 1 - tests/ut/python/parallel/test_onehot.py | 9 - .../parallel/test_operator_model_parallel.py | 48 +- .../parallel/test_optimizer_clone_weight.py | 6 +- .../ut/python/parallel/test_parameter_init.py | 2 +- tests/ut/python/parallel/test_prelu.py | 14 +- tests/ut/python/parallel/test_prelu_cell.py | 8 +- .../parallel/test_reduce_method_info.py | 58 +- tests/ut/python/parallel/test_reshape.py | 23 +- .../python/parallel/test_reshape_parameter.py | 6 +- tests/ut/python/parallel/test_scalar_loss.py | 1 - .../test_set_auto_parallel_context.py | 24 +- .../test_sigmoid_cross_entropy_with_logits.py | 12 +- .../test_softmax_cross_entropy_loss.py | 9 +- .../python/parallel/test_split_grad_sens.py | 9 +- tests/ut/python/parallel/test_squeeze_info.py | 16 +- tests/ut/python/parallel/test_sum_as_loss.py | 7 +- tests/ut/python/parallel/test_transpose.py | 2 - tests/ut/python/parallel/test_two_matmul.py | 8 +- .../parallel/test_two_weights_parameter.py | 1 - .../parallel/test_virtual_dataset_3_input.py | 4 +- 90 files changed, 4030 insertions(+), 4081 deletions(-) diff --git a/tests/st/auto_parallel/onehot_model_parallel.py b/tests/st/auto_parallel/onehot_model_parallel.py index d553bb15ee..b104e7d8f8 100644 --- a/tests/st/auto_parallel/onehot_model_parallel.py +++ b/tests/st/auto_parallel/onehot_model_parallel.py @@ -1,154 +1,154 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import os -import pytest - -import mindspore as ms -import mindspore.communication.management as distributedTool -import mindspore.context as context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P - -device_num = 2 -device_id = int(os.getenv('DEVICE_ID')) -rank_id = 0 - - -def setup_module(): - global device_num - global rank_id - np.random.seed(0) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - context.set_context(device_id=device_id) - distributedTool.init() - device_num = distributedTool.get_group_size() - rank_id = distributedTool.get_rank() - context.set_auto_parallel_context(device_num=device_num, - global_rank=rank_id) - - -def teardown_module(): - distributedTool.release() - - -class Onehot(Cell): - def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): - super(Onehot, self).__init__() - trans_stra = None - if strategy: - trans_stra = (strategy[0],) - self.onehot = P.OneHot().set_strategy(strategy=strategy) - self.depth = depth - self.on_value = Tensor(on_value, ms.float32) - self.off_value = Tensor(off_value, ms.float32) - self.transpose = P.Transpose().set_strategy(strategy=trans_stra) - self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1))) - - def construct(self, input, indices): - x = self.onehot(indices, self.depth, self.on_value, self.off_value) - x = self.transpose(x, (1, 0)) - x = self.sub(input, x) - return x - - -class DataGenerator(): - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def generate_data(self, shape): - data = np.random.rand(*shape) - return data - - def input_data(self, shape): - data = (self.generate_data(shape) * 2).astype(np.float32) - stra = [1] * len(shape) - stra[0] = device_num - datas = self.get_parallel_blocks(data, stra) - return Tensor(data), Tensor(datas[rank_id]) - - def label_data(self, shape, classes): - data = (self.generate_data(shape) * (classes - 1)).astype(np.int32) - stra = [1] * len(shape) - stra[0] = device_num - datas = self.get_parallel_blocks(data, stra) - return Tensor(data), Tensor(datas[rank_id]) - - -class OneHotFactory: - def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None): - dataGen = DataGenerator() - self.input_full, self.input_part = dataGen.input_data((classes, batch_size)) - self.label_full, self.label_part = dataGen.label_data((batch_size,), classes) - self.depth = classes - self.on_value = on_value - self.off_value = off_value - self.axis = axis - self.strategy = strategy - - def forward_mindspore_single_impl(self): - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value) - out = net(self.input_full, self.label_full) - return out - - def forward_mindspore_parallel_impl(self): - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value, strategy=self.strategy) - out = net.compile_and_run(self.input_full, self.label_full) - return out - - def forward_cmp(self): - out_mindspore_single = self.forward_mindspore_single_impl().asnumpy() - context.reset_auto_parallel_context() - out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy() - context.reset_auto_parallel_context() - assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001) - - -def test_reid_onehot_forward_int32_128_depth1024_model_parallel(): - fact = OneHotFactory(batch_size=128, - classes=1024, - on_value=1.000000, - off_value=0.000000, - axis=-1, - strategy=((1, device_num), (), ())) - fact.forward_cmp() - - -def test_reid_onehot_forward_int32_1024_depth128_model_parallel(): - fact = OneHotFactory(batch_size=1024, - classes=128, - on_value=1.000000, - off_value=0.000000, - axis=-1, - strategy=((1, device_num), (), ())) - fact.forward_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import numpy as np + +import mindspore as ms +import mindspore.communication.management as distributedTool +import mindspore.context as context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P + +device_num = 2 +device_id = int(os.getenv('DEVICE_ID')) +rank_id = 0 + + +def setup_module(): + global device_num + global rank_id + np.random.seed(0) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(device_id=device_id) + distributedTool.init() + device_num = distributedTool.get_group_size() + rank_id = distributedTool.get_rank() + context.set_auto_parallel_context(device_num=device_num, + global_rank=rank_id) + + +def teardown_module(): + distributedTool.release() + + +class Onehot(Cell): + def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): + super(Onehot, self).__init__() + trans_stra = None + if strategy: + trans_stra = (strategy[0],) + self.onehot = P.OneHot().set_strategy(strategy=strategy) + self.depth = depth + self.on_value = Tensor(on_value, ms.float32) + self.off_value = Tensor(off_value, ms.float32) + self.transpose = P.Transpose().set_strategy(strategy=trans_stra) + self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1))) + self.axis = axis + + def construct(self, input_, indices): + x = self.onehot(indices, self.depth, self.on_value, self.off_value) + x = self.transpose(x, (1, 0)) + x = self.sub(input_, x) + return x + + +class DataGenerator(): + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def generate_data(self, shape): + data = np.random.rand(*shape) + return data + + def input_data(self, shape): + data = (self.generate_data(shape) * 2).astype(np.float32) + stra = [1] * len(shape) + stra[0] = device_num + datas = self.get_parallel_blocks(data, stra) + return Tensor(data), Tensor(datas[rank_id]) + + def label_data(self, shape, classes): + data = (self.generate_data(shape) * (classes - 1)).astype(np.int32) + stra = [1] * len(shape) + stra[0] = device_num + datas = self.get_parallel_blocks(data, stra) + return Tensor(data), Tensor(datas[rank_id]) + + +class OneHotFactory: + def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None): + data_gen = DataGenerator() + self.input_full, self.input_part = data_gen.input_data((classes, batch_size)) + self.label_full, self.label_part = data_gen.label_data((batch_size,), classes) + self.depth = classes + self.on_value = on_value + self.off_value = off_value + self.axis = axis + self.strategy = strategy + + def forward_mindspore_single_impl(self): + net = Onehot(axis=self.axis, + depth=self.depth, + on_value=self.on_value, + off_value=self.off_value) + out = net(self.input_full, self.label_full) + return out + + def forward_mindspore_parallel_impl(self): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net = Onehot(axis=self.axis, + depth=self.depth, + on_value=self.on_value, + off_value=self.off_value, strategy=self.strategy) + out = net.compile_and_run(self.input_full, self.label_full) + return out + + def forward_cmp(self): + out_mindspore_single = self.forward_mindspore_single_impl().asnumpy() + context.reset_auto_parallel_context() + out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy() + context.reset_auto_parallel_context() + assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001) + + +def test_reid_onehot_forward_int32_128_depth1024_model_parallel(): + fact = OneHotFactory(batch_size=128, + classes=1024, + on_value=1.000000, + off_value=0.000000, + axis=-1, + strategy=((1, device_num), (), ())) + fact.forward_cmp() + + +def test_reid_onehot_forward_int32_1024_depth128_model_parallel(): + fact = OneHotFactory(batch_size=1024, + classes=128, + on_value=1.000000, + off_value=0.000000, + axis=-1, + strategy=((1, device_num), (), ())) + fact.forward_cmp() diff --git a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py index b58e713a1d..f0f3d2b23a 100644 --- a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py +++ b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py @@ -1,275 +1,276 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import os -import pytest -from numpy import allclose - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common import dtype as mstype -from mindspore.common.parameter import ParameterTuple, Parameter -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.nn.optim.momentum import Momentum -from mindspore.ops import composite as C -from mindspore.ops import functional as F -from mindspore.ops import operations as P -from mindspore.train import Model, ParallelMode -from mindspore.train.callback import Callback - -np.set_printoptions(threshold=np.inf) -device_num = 2 -device_id = int(os.getenv('DEVICE_ID')) -rank_id = 0 -embed = 128 -classes = 32 -batch_size = 32 * 2 -MatmulParamShape = (classes, embed) - - -def setup_module(): - global device_num - global rank_id - np.random.seed(0) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - context.set_context(device_id=device_id) - distributedTool.init() - rank_id = distributedTool.get_rank() - device_num = distributedTool.get_group_size() - context.set_auto_parallel_context(device_num=device_num, - global_rank=device_id) - - -def teardown_module(): - distributedTool.release() - - -class DataGenerator(): - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def generate_data(self, shape): - size = np.cumprod(shape)[-1] - num_range = min(size, 1000) - data = (np.arange(0, size) % num_range) / num_range - data = np.reshape(data, shape) - return data - - def input_data(self, shape): - data = (self.generate_data(shape) * 0.1).astype(np.float32) - stra = [1] * len(shape) - stra[0] = device_num - datas = self.get_parallel_blocks(data, stra) - return Tensor(data), Tensor(datas[rank_id]) - - def label_data(self, shape, embed): - data = (self.generate_data(shape) * (embed - 1)).astype(np.int32) - stra = [1] * len(shape) - stra[0] = device_num - datas = self.get_parallel_blocks(data, stra) - return Tensor(data), Tensor(datas[rank_id]) - - -class Dataset(): - def __init__(self, predict, label, length=1, input_num=2): - self.predict = predict - self.label = label - self.index = 0 - self.length = length - self.input_num = input_num - - def __iter__(self): - return self - - def __next__(self): - if self.index >= self.length: - raise StopIteration - self.index += 1 - if self.input_num == 2: - return self.predict, self.label - else: - return self.predict, - - def reset(self): - self.index = 0 - - def get_dataset_size(self): - return self.length - - def get_repeat_count(self): - return self.length - - -class ModelCallback(Callback): - def __init__(self): - super(ModelCallback, self).__init__() - self.loss_list = [] - - def epoch_end(self, run_context, *args): - cb_params = run_context.original_args() - result = cb_params.net_outputs - self.loss_list.append(result.asnumpy().mean()) - - -class SoftmaxCrossEntropyExpand(Cell): - def __init__(self, sparse=False, stra_list=[]): - super(SoftmaxCrossEntropyExpand, self).__init__() - if len(stra_list) < 11: - stra_list = [None] * 11 - self.exp = P.Exp() - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1]) - self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) - self.on_value = Tensor(1.0, mstype.float32) - self.off_value = Tensor(0.0, mstype.float32) - self.div = P.Div().set_strategy(strategy=stra_list[3]) - self.log = P.Log().set_strategy(strategy=stra_list[4]) - self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5]) - self.mul = P.Mul().set_strategy(strategy=stra_list[6]) - self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) - self.cast = P.Cast() - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8]) - self.sparse = sparse - self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9]) - self.sub = P.Sub().set_strategy(strategy=stra_list[10]) - - def construct(self, logit, label): - logit_max = self.reduce_max(logit, -1) - exp = self.exp(self.sub(logit, logit_max)) - exp_sum = self.reduce_sum(exp, -1) - softmax_result = self.div(exp, exp_sum) - if self.sparse: - label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) - softmax_result_log = self.log(softmax_result) - loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) - loss = self.mul2(F.scalar_to_array(-1.0), loss) - loss = self.reduce_mean(loss, -1) - return loss - - -class MatmulNet(Cell): - def __init__(self, matmul_stra=None, loss_stra_list=[]): - super(MatmulNet, self).__init__() - self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra) - self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list) - self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight") - - def construct(self, x, label): - loss_input = self.matmul(x, self.weight) - out = self.loss(loss_input, label) - return out - - -class LossFactory(): - def __init__(self): - dataGen = DataGenerator() - self.input_full, self.input_part = dataGen.input_data((batch_size, embed)) - self.label_full, self.label_part = dataGen.label_data((batch_size,), embed) - - def single_matmul_trains(self): - single_callback = ModelCallback() - net = MatmulNet() - optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - model = Model(net, optimizer=optimizer) - epoch_size = 6 - dataset = Dataset(self.input_full, self.label_full) - model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False) - loss_value = np.array(single_callback.loss_list) - return loss_value - - def data_parallel_matmul_trains(self): - parallel_callback = ModelCallback() - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net = MatmulNet() - optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - model = Model(net, optimizer=optimizer) - epoch_size = 6 - dataset = Dataset(self.input_part, self.label_part) - model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) - loss_value = np.array(parallel_callback.loss_list) - return loss_value - - def model_parallel_matmul_trains(self): - parallel_callback = ModelCallback() - matmul_stra = ((1, 1), (device_num, 1)) - reduce_max_stra = ((1, device_num),) - sub_stra = ((1, device_num), (1, 1)) - exp_stra = ((1, device_num),) - reduce_sum_stra = ((1, device_num),) - div_stra = ((1, device_num), (1, 1)) - log_stra = ((1, device_num),) - mul_stra = ((1, device_num), (1, device_num)) - sum_cross_entropy_stra = ((1, device_num),) - mul2_stra = ((), (device_num,)) - reduce_mean_stra = ((device_num,),) - onehot_stra = ((1, device_num), (), ()) - loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, - sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] - context.set_auto_parallel_context(parallel_mode="auto_parallel") - net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) - optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - model = Model(net, optimizer=optimizer) - epoch_size = 6 - dataset = Dataset(self.input_part, self.label_part) - model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) - loss_value = np.array(parallel_callback.loss_list) - return loss_value - - def mix_parallel_matmul_trains(self): - parallel_callback = ModelCallback() - matmul_stra = ((device_num, 1), (1, 1)) - reduce_max_stra = ((1, device_num),) - sub_stra = ((device_num, 1), (device_num, 1)) - exp_stra = ((1, device_num),) - reduce_sum_stra = ((1, device_num),) - div_stra = ((1, device_num), (1, 1)) - log_stra = ((1, device_num),) - mul_stra = ((1, device_num), (1, device_num)) - sum_cross_entropy_stra = ((1, device_num),) - mul2_stra = ((), (device_num,)) - reduce_mean_stra = ((device_num,),) - onehot_stra = ((1, device_num), (), ()) - loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, - sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] - context.set_auto_parallel_context(parallel_mode="auto_parallel") - net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) - optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - model = Model(net, optimizer=optimizer) - epoch_size = 6 - dataset = Dataset(self.input_part, self.label_part) - model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) - loss_value = np.array(parallel_callback.loss_list) - return loss_value - - -def test_all_trains(): - loss_factory = LossFactory() - context.reset_auto_parallel_context() - single_loss = loss_factory.single_matmul_trains() - model_parallel_loss = loss_factory.model_parallel_matmul_trains() - mix_parallel_loss = loss_factory.mix_parallel_matmul_trains() - assert allclose(single_loss, model_parallel_loss) - assert allclose(single_loss, mix_parallel_loss) +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import numpy as np +from numpy import allclose + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common import dtype as mstype +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.nn.optim.momentum import Momentum +from mindspore.ops import functional as F +from mindspore.ops import operations as P +from mindspore.train import Model +from mindspore.train.callback import Callback + +np.set_printoptions(threshold=np.inf) +device_num = 2 +device_id = int(os.getenv('DEVICE_ID')) +rank_id = 0 +embed = 128 +classes = 32 +batch_size = 32 * 2 +MatmulParamShape = (classes, embed) + + +def setup_module(): + global device_num + global rank_id + np.random.seed(0) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(device_id=device_id) + distributedTool.init() + rank_id = distributedTool.get_rank() + device_num = distributedTool.get_group_size() + context.set_auto_parallel_context(device_num=device_num, + global_rank=device_id) + + +def teardown_module(): + distributedTool.release() + + +class DataGenerator(): + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def generate_data(self, shape): + size = np.cumprod(shape)[-1] + num_range = min(size, 1000) + data = (np.arange(0, size) % num_range) / num_range + data = np.reshape(data, shape) + return data + + def input_data(self, shape): + data = (self.generate_data(shape) * 0.1).astype(np.float32) + stra = [1] * len(shape) + stra[0] = device_num + datas = self.get_parallel_blocks(data, stra) + return Tensor(data), Tensor(datas[rank_id]) + + def label_data(self, shape, embed_): + data = (self.generate_data(shape) * (embed_ - 1)).astype(np.int32) + stra = [1] * len(shape) + stra[0] = device_num + datas = self.get_parallel_blocks(data, stra) + return Tensor(data), Tensor(datas[rank_id]) + + +class Dataset(): + def __init__(self, predict, label, length=1, input_num=2): + self.predict = predict + self.label = label + self.index = 0 + self.length = length + self.input_num = input_num + + def __iter__(self): + return self + + def __next__(self): + if self.index >= self.length: + raise StopIteration + self.index += 1 + if self.input_num == 2: + return (self.predict, self.label) + return (self.predict,) + + def reset(self): + self.index = 0 + + def get_dataset_size(self): + return self.length + + def get_repeat_count(self): + return self.length + + +class ModelCallback(Callback): + def __init__(self): + super(ModelCallback, self).__init__() + self.loss_list = [] + + def epoch_end(self, run_context): + cb_params = run_context.original_args() + result = cb_params.net_outputs + self.loss_list.append(result.asnumpy().mean()) + + +class SoftmaxCrossEntropyExpand(Cell): + def __init__(self, sparse=False, stra_list=None): + super(SoftmaxCrossEntropyExpand, self).__init__() + if stra_list is None: + stra_list = [] + if len(stra_list) < 11: + stra_list = [None] * 11 + self.exp = P.Exp() + self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1]) + self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.div = P.Div().set_strategy(strategy=stra_list[3]) + self.log = P.Log().set_strategy(strategy=stra_list[4]) + self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5]) + self.mul = P.Mul().set_strategy(strategy=stra_list[6]) + self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) + self.cast = P.Cast() + self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8]) + self.sparse = sparse + self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9]) + self.sub = P.Sub().set_strategy(strategy=stra_list[10]) + + def construct(self, logit, label): + logit_max = self.reduce_max(logit, -1) + exp = self.exp(self.sub(logit, logit_max)) + exp_sum = self.reduce_sum(exp, -1) + softmax_result = self.div(exp, exp_sum) + if self.sparse: + label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + softmax_result_log = self.log(softmax_result) + loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) + loss = self.mul2(F.scalar_to_array(-1.0), loss) + loss = self.reduce_mean(loss, -1) + return loss + + +class MatmulNet(Cell): + def __init__(self, matmul_stra=None, loss_stra_list=None): + super(MatmulNet, self).__init__() + if loss_stra_list is None: + loss_stra_list = [] + self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra) + self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list) + self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight") + + def construct(self, x, label): + loss_input = self.matmul(x, self.weight) + out = self.loss(loss_input, label) + return out + + +class LossFactory(): + def __init__(self): + data_gen = DataGenerator() + self.input_full, self.input_part = data_gen.input_data((batch_size, embed)) + self.label_full, self.label_part = data_gen.label_data((batch_size,), embed) + + def single_matmul_trains(self): + single_callback = ModelCallback() + net = MatmulNet() + optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + model = Model(net, optimizer=optimizer) + epoch_size = 6 + dataset = Dataset(self.input_full, self.label_full) + model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False) + loss_value = np.array(single_callback.loss_list) + return loss_value + + def data_parallel_matmul_trains(self): + parallel_callback = ModelCallback() + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net = MatmulNet() + optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + model = Model(net, optimizer=optimizer) + epoch_size = 6 + dataset = Dataset(self.input_part, self.label_part) + model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) + loss_value = np.array(parallel_callback.loss_list) + return loss_value + + def model_parallel_matmul_trains(self): + parallel_callback = ModelCallback() + matmul_stra = ((1, 1), (device_num, 1)) + reduce_max_stra = ((1, device_num),) + sub_stra = ((1, device_num), (1, 1)) + exp_stra = ((1, device_num),) + reduce_sum_stra = ((1, device_num),) + div_stra = ((1, device_num), (1, 1)) + log_stra = ((1, device_num),) + mul_stra = ((1, device_num), (1, device_num)) + sum_cross_entropy_stra = ((1, device_num),) + mul2_stra = ((), (device_num,)) + reduce_mean_stra = ((device_num,),) + onehot_stra = ((1, device_num), (), ()) + loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, + sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] + context.set_auto_parallel_context(parallel_mode="auto_parallel") + net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) + optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + model = Model(net, optimizer=optimizer) + epoch_size = 6 + dataset = Dataset(self.input_part, self.label_part) + model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) + loss_value = np.array(parallel_callback.loss_list) + return loss_value + + def mix_parallel_matmul_trains(self): + parallel_callback = ModelCallback() + matmul_stra = ((device_num, 1), (1, 1)) + reduce_max_stra = ((1, device_num),) + sub_stra = ((device_num, 1), (device_num, 1)) + exp_stra = ((1, device_num),) + reduce_sum_stra = ((1, device_num),) + div_stra = ((1, device_num), (1, 1)) + log_stra = ((1, device_num),) + mul_stra = ((1, device_num), (1, device_num)) + sum_cross_entropy_stra = ((1, device_num),) + mul2_stra = ((), (device_num,)) + reduce_mean_stra = ((device_num,),) + onehot_stra = ((1, device_num), (), ()) + loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, + sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] + context.set_auto_parallel_context(parallel_mode="auto_parallel") + net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) + optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + model = Model(net, optimizer=optimizer) + epoch_size = 6 + dataset = Dataset(self.input_part, self.label_part) + model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) + loss_value = np.array(parallel_callback.loss_list) + return loss_value + + +def test_all_trains(): + loss_factory = LossFactory() + context.reset_auto_parallel_context() + single_loss = loss_factory.single_matmul_trains() + model_parallel_loss = loss_factory.model_parallel_matmul_trains() + mix_parallel_loss = loss_factory.mix_parallel_matmul_trains() + assert allclose(single_loss, model_parallel_loss) + assert allclose(single_loss, mix_parallel_loss) diff --git a/tests/st/auto_parallel/test_expand_loss.py b/tests/st/auto_parallel/test_expand_loss.py index ed309ee2b8..d5148e35e2 100644 --- a/tests/st/auto_parallel/test_expand_loss.py +++ b/tests/st/auto_parallel/test_expand_loss.py @@ -1,26 +1,26 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os -import pytest - - -@pytest.mark.level0 -@pytest.mark.platform_x86_ascend_training -@pytest.mark.platform_arm_ascend_training -@pytest.mark.env_single -def test_expand_loss(): - sh_path = os.path.split(os.path.realpath(__file__))[0] - ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh") - assert (ret == 0) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import pytest + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_single +def test_expand_loss(): + sh_path = os.path.split(os.path.realpath(__file__))[0] + ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh") + assert ret == 0 diff --git a/tests/st/auto_parallel/test_model_parallel_onehot.py b/tests/st/auto_parallel/test_model_parallel_onehot.py index f0ad888ecc..8f5d2dbe6f 100644 --- a/tests/st/auto_parallel/test_model_parallel_onehot.py +++ b/tests/st/auto_parallel/test_model_parallel_onehot.py @@ -1,22 +1,21 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -import pytest - - -def test_expand_loss(): - ret = os.system("sh run_onehot_model_parallel.sh") - assert (ret == 0) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os + + +def test_expand_loss(): + ret = os.system("sh run_onehot_model_parallel.sh") + assert ret == 0 diff --git a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py index cb03a5b032..e316f6ae06 100644 --- a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py +++ b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================ -import numpy as np import os +import numpy as np import pytest import mindspore.common.dtype as mstype @@ -37,31 +37,29 @@ init() context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL) -def weight_variable(shape, factor=0.1): +def weight_variable(): return One() def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): - init_value = weight_variable((out_channels, in_channels, 3, 3)) + init_value = weight_variable() return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): - init_value = weight_variable((out_channels, in_channels, 1, 1)) + init_value = weight_variable() return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): - init_value = weight_variable((out_channels, in_channels, 7, 7)) + init_value = weight_variable() return nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) def _fused_bn(channels, momentum=0.9): - init_weight = weight_variable((channels,)) - init_bias = weight_variable((channels,)) return nn.BatchNorm2d(channels, momentum=momentum) @@ -210,8 +208,8 @@ class ResNet(nn.Cell): self.mean = P.ReduceMean(keep_dims=True) self.end_point = nn.Dense(2048, num_classes, has_bias=True, - weight_init=weight_variable((num_classes, 2048)), - bias_init=weight_variable((num_classes,))) + weight_init=weight_variable(), + bias_init=weight_variable()) self.squeeze = P.Squeeze() self.cast = P.Cast() @@ -345,9 +343,8 @@ class Dataset(): raise StopIteration self.index += 1 if self.input_num == 2: - return self.predict, self.label - else: - return self.predict, + return (self.predict, self.label) + return (self.predict,) def reset(self): self.index = 0 @@ -364,7 +361,7 @@ class ModelCallback(Callback): super(ModelCallback, self).__init__() self.loss_list = [] - def epoch_end(self, run_context, *args): + def epoch_end(self, run_context): cb_params = run_context.original_args() result = cb_params.net_outputs self.loss_list.append(result.asnumpy().mean()) @@ -376,9 +373,9 @@ class ModelCallback(Callback): def test_train_feed(num_classes=8192): set_algo_parameters(elementwise_op_strategy_follow=True) parallel_callback = ModelCallback() - dataGen = DataGenerator() - input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224)) - label_full, label_part = dataGen.label_data((32 * 2,)) + data_gen = DataGenerator() + _, input_part = data_gen.input_data((32 * 2, 3, 224, 224)) + _, label_part = data_gen.label_data((32 * 2,)) dataset = Dataset(input_part, label_part) net = resnet50(num_classes) loss = SoftmaxCrossEntropyExpand(sparse=True) @@ -396,9 +393,9 @@ def test_train_feed(num_classes=8192): def test_train_feed2(num_classes=1001): set_algo_parameters(elementwise_op_strategy_follow=True) parallel_callback = ModelCallback() - dataGen = DataGenerator() - input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224)) - label_full, label_part = dataGen.label_data((32 * 2,)) + data_gen = DataGenerator() + _, input_part = data_gen.input_data((32 * 2, 3, 224, 224)) + _, label_part = data_gen.label_data((32 * 2,)) dataset = Dataset(input_part, label_part) net = resnet50(num_classes) loss = SoftmaxCrossEntropyExpand(sparse=True) diff --git a/tests/ut/python/communication/__init__.py b/tests/ut/python/communication/__init__.py index 6cb7088820..83f9a36dcc 100644 --- a/tests/ut/python/communication/__init__.py +++ b/tests/ut/python/communication/__init__.py @@ -1,17 +1,17 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -sys.path.append("../../..") +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append("../../..") diff --git a/tests/ut/python/communication/test_comm.py b/tests/ut/python/communication/test_comm.py index c9d20674f3..7688adb41a 100644 --- a/tests/ut/python/communication/test_comm.py +++ b/tests/ut/python/communication/test_comm.py @@ -25,7 +25,6 @@ from mindspore.nn import Dense from mindspore.nn import Momentum from mindspore.nn import ReLU from mindspore.nn import TrainOneStepCell, WithLossCell -from mindspore.ops.operations import Split from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter from mindspore.ops.operations.comm_ops import Broadcast diff --git a/tests/ut/python/communication/test_data_parallel_lenet.py b/tests/ut/python/communication/test_data_parallel_lenet.py index a9840cd734..7a5062b941 100755 --- a/tests/ut/python/communication/test_data_parallel_lenet.py +++ b/tests/ut/python/communication/test_data_parallel_lenet.py @@ -16,8 +16,8 @@ @File : test_data_parallel_lenet.py @Desc : test data parallel lenet """ -import numpy as np import os +import numpy as np import mindspore.context as context import mindspore.nn as nn @@ -80,7 +80,6 @@ def test_lenet5_train_step_training_pynative(): context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=8, mirror_mean=True) - size = 3 predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.zeros([1, 10]).astype(np.float32)) DatasetLenet(predict, label, 2) diff --git a/tests/ut/python/parallel/__init__.py b/tests/ut/python/parallel/__init__.py index 653fde4f3f..edd469899e 100644 --- a/tests/ut/python/parallel/__init__.py +++ b/tests/ut/python/parallel/__init__.py @@ -19,7 +19,7 @@ from mindspore.parallel._utils import _reset_op_id from mindspore.parallel.algo_parameter_config import reset_algo_parameters -def setup_module(module): +def setup_module(): auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) reset_cost_model_context() diff --git a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py index 32e19be33c..b1a4423b32 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py @@ -1,178 +1,178 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class AddRelu(Cell): - def __init__(self, strategy0=None, strategy1=None): - super(AddRelu, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.relu = P.ReLU(strategy=strategy1) - - def construct(self, x, z): - out = self.add(x, z) - return self.relu(out) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class AddReluFactory: - def __init__(self, input_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = 1.0 - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in strategy1[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def forward_mindspore_impl(self): - net = AddRelu() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) - output_grad = Tensor(output_grads[self.out_id]) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_add_relu_input_256_64(): - stra0 = (0, (2, 2), ()) - stra1 = (0, (2, 2)) - fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_add_relu_input_256_64(): - stra0 = (0, (2, 2), ()) - stra1 = (0, (2, 2)) - fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import pytest + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class AddRelu(Cell): + def __init__(self, strategy0=None, strategy1=None): + super(AddRelu, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.relu = P.ReLU(strategy=strategy1) + + def construct(self, x, z): + out = self.add(x, z) + return self.relu(out) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class AddReluFactory: + def __init__(self, input_shape, strategy0, strategy1): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = 1.0 + self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, + input_shape).astype(np.float32) + self.strategy0 = strategy0 + self.strategy1 = strategy1 + need_dev_num = 1 + need_dev_num_ = 1 + for s in strategy0[1]: + need_dev_num = need_dev_num * s + for s in strategy1[1]: + need_dev_num_ = need_dev_num_ * s + self.x_id = device_id % need_dev_num + self.y_id = device_id % need_dev_num + self.out_id = device_id % need_dev_num_ + + def forward_mindspore_impl(self): + net = AddRelu() + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + out = net(x, y) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(self.input_np2, ms.float32) + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + output_grad = Tensor(self.output_grad_np) + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + net = AddRelu() + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) + output_grad = Tensor(output_grads[self.out_id]) + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(self.input_np2, ms.float32) + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], + parallel_inputs_run=[x1, y1, output_grad]) + return input_grad + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + _ = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + _ = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +@pytest.mark.reid_forward +def test_reid_add_relu_input_256_64(): + stra0 = (0, (2, 2), ()) + stra1 = (0, (2, 2)) + fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) + fact.forward_cmp() + + +@pytest.mark.reid_grad +def test_reid_grad_add_relu_input_256_64(): + stra0 = (0, (2, 2), ()) + stra1 = (0, (2, 2)) + fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py index 0a839f45e8..b492312da6 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py @@ -1,356 +1,356 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -from numpy import allclose - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore._checkparam import check_bool, twice -from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class _Conv(Cell): - r"""Applies a N-D convolution over an input signal composed of several input - planes. - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - pad_mode, - padding, - dilation, - group, - has_bias, - weight_init, - bias_init): - super(_Conv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.pad_mode = pad_mode - self.padding = padding - self.dilation = dilation - self.group = group - self.has_bias = has_bias - if not (isinstance(in_channels, int) and in_channels > 0): - raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' - + str(in_channels) + ', should be a int and greater than 0.') - if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ - (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ - kernel_size[0] < 1 or kernel_size[1] < 1: - raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' - + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') - if in_channels % group != 0: - raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' - 'attr \'group\' of \'Conv2D\' Op.') - if out_channels % group != 0: - raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' - 'attr \'group\' of \'Conv2D\' Op.') - - self.weight = Parameter(initializer( - weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') - - if check_bool(has_bias): - self.bias = Parameter(initializer( - bias_init, [out_channels]), name='bias') - else: - if bias_init != 'zeros': - print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") - self.bias = None - - def construct(self, *inputs): - raise NotImplementedError - - -class Conv2d(_Conv): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - has_bias=False, - weight_init='normal', - bias_init='zeros', - strategy=None): - kernel_size = twice(kernel_size) - super(Conv2d, self).__init__( - in_channels, - out_channels, - kernel_size, - stride, - pad_mode, - padding, - dilation, - group, - has_bias, - weight_init, - bias_init) - self.add = P.TensorAdd(strategy) - self.conv2d = P.Conv2D(out_channel=self.out_channels, - kernel_size=self.kernel_size, - mode=1, - pad_mode=self.pad_mode, - pad=self.padding, - stride=self.stride, - dilation=self.dilation, - group=self.group, - strategy=None) - self.bias_add = P.BiasAdd() - - def construct(self, input1, input2): - x = self.add(input1, input2) - if self.has_bias: - return self.bias_add(self.conv2d(x, self.weight), - self.bias) - return self.conv2d(x, self.weight) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input1, input2, output_grad): - return grad_all_with_sens(self.network)(input1, input2, output_grad) - - -class Conv2dFactory: - def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): - self.in_n, self.in_c, self.in_h, self.in_w = input_shape - self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape - self.stride = stride - self.pad_mode = pad_mode - self.padding = padding - self.dilation = dilation - self.group = group - self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) - prefix = "" - input_size = 1 - filter_size = 1 - for s in input_shape: - prefix = prefix + str(s) + "_" - input_size = input_size * s - self.prefix = prefix - for s in filter_shape: - filter_size = filter_size * s - number_range1 = min(10, input_size) - number_range2 = min(10, filter_size) - self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( - np.float16) - self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( - np.float16) - self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( - np.float16) - self.has_bias = has_bias - if self.has_bias is True: - self.bias_np = np.arange(0, self.out_c).astype(np.float16) - - self.out_shape = (128, 64, 56, 56) - out_size = 1 - for s in self.out_shape: - out_size = out_size * s - number_range3 = min(10, out_size) - self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, - self.out_shape).astype(np.float16) - self.x_id = device_id % 4 - self.y_id = device_id % 4 - self.out_strategy = self.strategy0[1] - self.out_id = device_id % 4 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_conv2d_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight) - out = net(input1, input2) - return out.asnumpy() - - def forward_conv2d_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight, - strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_conv2d_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - output_grad = Tensor(self.output_grad_np) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias, ) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight) - - grad_net = Grad(net) - grad_net.set_train() - out_grad = grad_net(x, y, output_grad) - return out_grad - - def grad_conv2d_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad = Tensor(self.output_grad_np) - output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - output_grad1 = Tensor(output_grads[self.out_id]) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight, - strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_train() - grad_net.set_auto_parallel() - out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return out_grad - - def forward_conv2d_cmp(self): - out_mindspore = self.forward_conv2d_mindspore_impl() - out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_conv2d_cmp(self): - input_grad_mindspore = self.grad_conv2d_mindspore_impl() - input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) - assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) - assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) - - -def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): - fact = Conv2dFactory(input_shape=(128, 64, 112, 112), - filter_shape=(64, 64, 1, 1), - stride=2, pad_mode='valid', padding=0, - dilation=1, group=1, has_bias=False) - fact.forward_conv2d_cmp() - - -def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): - fact = Conv2dFactory(input_shape=(128, 64, 112, 112), - filter_shape=(64, 64, 1, 1), - stride=2, pad_mode='valid', padding=0, - dilation=1, group=1, has_bias=False) - fact.grad_conv2d_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +from numpy import allclose + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore._checkparam import check_bool, twice +from mindspore.common.initializer import initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class _Conv(Cell): + r"""Applies a N-D convolution over an input signal composed of several input + planes. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init): + super(_Conv, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.pad_mode = pad_mode + self.padding = padding + self.dilation = dilation + self.group = group + self.has_bias = has_bias + if not (isinstance(in_channels, int) and in_channels > 0): + raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' + + str(in_channels) + ', should be a int and greater than 0.') + if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ + (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ + kernel_size[0] < 1 or kernel_size[1] < 1: + raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' + + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') + if in_channels % group != 0: + raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' + 'attr \'group\' of \'Conv2D\' Op.') + if out_channels % group != 0: + raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' + 'attr \'group\' of \'Conv2D\' Op.') + + self.weight = Parameter(initializer( + weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') + + if check_bool(has_bias): + self.bias = Parameter(initializer( + bias_init, [out_channels]), name='bias') + else: + if bias_init != 'zeros': + print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") + self.bias = None + + def construct(self, *inputs): + raise NotImplementedError + + +class Conv2d(_Conv): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + pad_mode='same', + padding=0, + dilation=1, + group=1, + has_bias=False, + weight_init='normal', + bias_init='zeros', + strategy=None): + kernel_size = twice(kernel_size) + super(Conv2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init) + self.add = P.TensorAdd(strategy) + self.conv2d = P.Conv2D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + strategy=None) + self.bias_add = P.BiasAdd() + + def construct(self, input1, input2): + x = self.add(input1, input2) + if self.has_bias: + return self.bias_add(self.conv2d(x, self.weight), + self.bias) + return self.conv2d(x, self.weight) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, input1, input2, output_grad): + return grad_all_with_sens(self.network)(input1, input2, output_grad) + + +class Conv2dFactory: + def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): + self.in_n, self.in_c, self.in_h, self.in_w = input_shape + self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape + self.stride = stride + self.pad_mode = pad_mode + self.padding = padding + self.dilation = dilation + self.group = group + self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) + prefix = "" + input_size = 1 + filter_size = 1 + for s in input_shape: + prefix = prefix + str(s) + "_" + input_size = input_size * s + self.prefix = prefix + for s in filter_shape: + filter_size = filter_size * s + number_range1 = min(10, input_size) + number_range2 = min(10, filter_size) + self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( + np.float16) + self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( + np.float16) + self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( + np.float16) + self.has_bias = has_bias + if self.has_bias is True: + self.bias_np = np.arange(0, self.out_c).astype(np.float16) + + self.out_shape = (128, 64, 56, 56) + out_size = 1 + for s in self.out_shape: + out_size = out_size * s + number_range3 = min(10, out_size) + self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, + self.out_shape).astype(np.float16) + self.x_id = device_id % 4 + self.y_id = device_id % 4 + self.out_strategy = self.strategy0[1] + self.out_id = device_id % 4 + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_conv2d_mindspore_impl(self): + input1 = Tensor(self.input_np1) + input2 = Tensor(self.input_np2) + weight = Tensor(self.weight_np) + if self.has_bias: + bias = Tensor(self.bias_np) + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=True, weight_init=weight, + bias_init=bias) + else: + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=False, weight_init=weight) + out = net(input1, input2) + return out.asnumpy() + + def forward_conv2d_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + weight = Tensor(self.weight_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + if self.has_bias: + bias = Tensor(self.bias_np) + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=True, weight_init=weight, + bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) + else: + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=False, weight_init=weight, + strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_conv2d_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + weight = Tensor(self.weight_np) + output_grad = Tensor(self.output_grad_np) + if self.has_bias: + bias = Tensor(self.bias_np) + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=True, weight_init=weight, + bias_init=bias,) + else: + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=False, weight_init=weight) + + grad_net = Grad(net) + grad_net.set_train() + out_grad = grad_net(x, y, output_grad) + return out_grad + + def grad_conv2d_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + weight = Tensor(self.weight_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad = Tensor(self.output_grad_np) + output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + output_grad1 = Tensor(output_grads[self.out_id]) + if self.has_bias: + bias = Tensor(self.bias_np) + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=True, weight_init=weight, + bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) + else: + net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, + kernel_size=(self.kernel_h, self.kernel_w), + stride=self.stride, pad_mode=self.pad_mode, + padding=self.padding, dilation=self.dilation, + group=self.group, has_bias=False, weight_init=weight, + strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) + + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_train() + grad_net.set_auto_parallel() + out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], + parallel_inputs_run=[x1, y1, output_grad1]) + return out_grad + + def forward_conv2d_cmp(self): + out_mindspore = self.forward_conv2d_mindspore_impl() + out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) + + def grad_conv2d_cmp(self): + input_grad_mindspore = self.grad_conv2d_mindspore_impl() + input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) + assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) + assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) + + +def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): + fact = Conv2dFactory(input_shape=(128, 64, 112, 112), + filter_shape=(64, 64, 1, 1), + stride=2, pad_mode='valid', padding=0, + dilation=1, group=1, has_bias=False) + fact.forward_conv2d_cmp() + + +def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): + fact = Conv2dFactory(input_shape=(128, 64, 112, 112), + filter_shape=(64, 64, 1, 1), + stride=2, pad_mode='valid', padding=0, + dilation=1, group=1, has_bias=False) + fact.grad_conv2d_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py index c735b28c43..41991aac74 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py @@ -1,120 +1,120 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.nn import Dropout - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Net(Cell): - def __init__(self, keep_prob, seed0, seed1, strategy=None): - super(Net, self).__init__() - self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) - - def construct(self, input): - x = self.drop(input) - return x - - -# pylint: disable=comparison-with-itself -class DropoutFactory: - def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): - size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(10, size) - self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) - self.keep_prob = keep_prob - self.seed0 = seed0 - self.seed1 = seed1 - self.strategy0 = strategy0 - need_dev_num = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - self.x_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def d4_tensor_compare(self, input, out_me): - [a, b, c, d] = input.shape - for i in range(a): - for j in range(b): - for k in range(c): - for e in range(d): - if out_me[i, j, k, e] == 0: - assert True == True - else: - assert np.allclose(out_me[i, j, k, e], input[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np) - inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - net = Net(0.4, 0, 0, strategy=self.strategy0) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) - return out.asnumpy() - - def forward_cmp(self): - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) - - -def test_reid_dropout_forward_seed_F32_64_512_8_8(): - fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) - fact.forward_cmp() - - -def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): - fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) - fact.forward_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.nn import Dropout + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Net(Cell): + def __init__(self, keep_prob, seed0, seed1, strategy=None): + super(Net, self).__init__() + self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) + + def construct(self, input_): + x = self.drop(input_) + return x + + +# pylint: disable=comparison-with-itself +class DropoutFactory: + def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): + size = 1 + prefix = "" + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(10, size) + self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) + self.keep_prob = keep_prob + self.seed0 = seed0 + self.seed1 = seed1 + self.strategy0 = strategy0 + need_dev_num = 1 + for s in strategy0[1]: + need_dev_num = need_dev_num * s + self.x_id = device_id % need_dev_num + self.out_id = device_id % need_dev_num + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def d4_tensor_compare(self, input_, out_me): + [a, b, c, d] = input_.shape + for i in range(a): + for j in range(b): + for k in range(c): + for e in range(d): + if out_me[i, j, k, e] == 0: + assert True + else: + assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np) + inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + net = Net(0.4, 0, 0, strategy=self.strategy0) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) + return out.asnumpy() + + def forward_cmp(self): + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) + self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) + + +def test_reid_dropout_forward_seed_F32_64_512_8_8(): + fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) + fact.forward_cmp() + + +def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): + fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) + fact.forward_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py index 006cc6925d..893ee1cd38 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py @@ -1,154 +1,154 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MatmulSingle(Cell): - def __init__(self, transpose_a=False, transpose_b=False): - super(MatmulSingle, self).__init__() - self.matmul = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - - def construct(self, x, y): - out = self.matmul(x, y) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - return out - - -class MatmulAllgather(Cell): - def __init__(self, group, transpose_a=False, transpose_b=False): - super(MatmulAllgather, self).__init__() - self.allgather = P.AllGather(group=group) - self.matmul = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - self.allreduce = P.AllReduce(group=group) - - def construct(self, x, y): - x = self.allgather(x) - out = self.matmul(x, y) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - out = self.allreduce(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, sens): - return grad_all_with_sens(self.network)(x, y, sens) - - -class MatmulAllgatherFactory: - def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): - self.inputx = self.GenValue(inputx_shape, 10) - self.inputy = self.GenValue(inputy_shape, 20) - self.x_stra = x_stra - self.y_stra = y_stra - stra_size = 1 - for s in x_stra: - stra_size = stra_size * s - self.stra_size = stra_size - - def GenValue(self, input_shape, delta): - size = 1 - for s in input_shape: - size = size * s - number_range = min(100, size) - input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) - return input_np - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl_single(self): - x = Tensor(self.inputx) - y = Tensor(self.inputy) - sens = Tensor(1.0, dtype=ms.float32) - net = MatmulSingle() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, sens) - return input_grad - - def grad_mindspore_impl_reduce(self): - inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) - inputys = self.get_parallel_blocks(self.inputy, self.y_stra) - x = Tensor(inputxs[device_id % self.stra_size]) - y = Tensor(inputys[device_id % self.stra_size]) - repeat_num = device_num / self.stra_size - v = self.stra_size * repeat_num * repeat_num * repeat_num - sens = Tensor(1.0 / v, dtype=ms.float32) - net = MatmulAllgather("hccl_world_group") - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, sens) - return input_grad - - def grad_cmp(self): - single_results = self.grad_mindspore_impl_single() - reduce_results = self.grad_mindspore_impl_reduce() - single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] - reduce_result0 = reduce_results[0].asnumpy() - single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] - reduce_result1 = reduce_results[1].asnumpy() - assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) - assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) - - -def test_reduce_grad(): - inputx_shape = (64, 32) - inputy_shape = (32, 64) - fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class MatmulSingle(Cell): + def __init__(self, transpose_a=False, transpose_b=False): + super(MatmulSingle, self).__init__() + self.matmul = P.MatMul(transpose_a, transpose_b) + self.pow = P.Pow() + self.reduce_sum = P.ReduceSum() + + def construct(self, x, y): + out = self.matmul(x, y) + out = self.pow(out, 2.0) + out = self.reduce_sum(out, None) + return out + + +class MatmulAllgather(Cell): + def __init__(self, group, transpose_a=False, transpose_b=False): + super(MatmulAllgather, self).__init__() + self.allgather = P.AllGather(group=group) + self.matmul = P.MatMul(transpose_a, transpose_b) + self.pow = P.Pow() + self.reduce_sum = P.ReduceSum() + self.allreduce = P.AllReduce(group=group) + + def construct(self, x, y): + x = self.allgather(x) + out = self.matmul(x, y) + out = self.pow(out, 2.0) + out = self.reduce_sum(out, None) + out = self.allreduce(out) + return out + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, sens): + return grad_all_with_sens(self.network)(x, y, sens) + + +class MatmulAllgatherFactory: + def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): + self.inputx = self.gen_value(inputx_shape, 10) + self.inputy = self.gen_value(inputy_shape, 20) + self.x_stra = x_stra + self.y_stra = y_stra + stra_size = 1 + for s in x_stra: + stra_size = stra_size * s + self.stra_size = stra_size + + def gen_value(self, input_shape, delta): + size = 1 + for s in input_shape: + size = size * s + number_range = min(100, size) + input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) + return input_np + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def grad_mindspore_impl_single(self): + x = Tensor(self.inputx) + y = Tensor(self.inputy) + sens = Tensor(1.0, dtype=ms.float32) + net = MatmulSingle() + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, sens) + return input_grad + + def grad_mindspore_impl_reduce(self): + inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) + inputys = self.get_parallel_blocks(self.inputy, self.y_stra) + x = Tensor(inputxs[device_id % self.stra_size]) + y = Tensor(inputys[device_id % self.stra_size]) + repeat_num = device_num / self.stra_size + v = self.stra_size * repeat_num * repeat_num * repeat_num + sens = Tensor(1.0 / v, dtype=ms.float32) + net = MatmulAllgather("hccl_world_group") + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, sens) + return input_grad + + def grad_cmp(self): + single_results = self.grad_mindspore_impl_single() + reduce_results = self.grad_mindspore_impl_reduce() + single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] + reduce_result0 = reduce_results[0].asnumpy() + single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] + reduce_result1 = reduce_results[1].asnumpy() + assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) + assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) + + +def test_reduce_grad(): + inputx_shape = (64, 32) + inputy_shape = (32, 64) + fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py index b206746930..6ea30fac2d 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py @@ -1,175 +1,175 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MatmulSingle(Cell): - def __init__(self, transpose_a=False, transpose_b=False): - super(MatmulSingle, self).__init__() - self.matmul1 = P.MatMul(transpose_a, transpose_b) - self.matmul2 = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - - def construct(self, x, y, z): - out = self.matmul1(x, y) - out = self.matmul2(out, z) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - return out - - -class MatmulReduce(Cell): - def __init__(self, group, transpose_a=False, transpose_b=False): - super(MatmulReduce, self).__init__() - self.matmul1 = P.MatMul(transpose_a, transpose_b) - self.allreduce1 = P.AllReduce(group=group) - self.matmul2 = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - self.allreduce2 = P.AllReduce(group=group) - - def construct(self, x, y, z): - out = self.matmul1(x, y) - out = self.allreduce1(out) - out = self.matmul2(out, z) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - out = self.allreduce2(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, z, sens): - return grad_all_with_sens(self.network)(x, y, z, sens) - - -class MatmulReduceFactory: - def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): - self.inputx = self.GenValue(inputx_shape, 10) - self.inputy = self.GenValue(inputy_shape, 20) - self.inputz = self.GenValue(inputz_shape, 30) - self.x_stra = x_stra - self.y_stra = y_stra - self.z_stra = z_stra - stra_size = 1 - for s in x_stra: - stra_size = stra_size * s - self.stra_size = stra_size - - def GenValue(self, input_shape, delta): - size = 1 - for s in input_shape: - size = size * s - number_range = min(100, size) - input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) - return input_np - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl_single(self): - x = Tensor(self.inputx) - y = Tensor(self.inputy) - z = Tensor(self.inputz) - sens = Tensor(1.0, dtype=ms.float32) - net = MatmulSingle() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, z, sens) - return input_grad - - def grad_mindspore_impl_reduce(self): - inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) - inputys = self.get_parallel_blocks(self.inputy, self.y_stra) - inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) - x = Tensor(inputxs[device_id % self.stra_size]) - y = Tensor(inputys[device_id % self.stra_size]) - z = Tensor(inputzs[device_id % self.stra_size]) - repeat_num = device_num / self.stra_size - v = self.stra_size * repeat_num * repeat_num * repeat_num - sens = Tensor(1.0 / v, dtype=ms.float32) - net = MatmulReduce("hccl_world_group") - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, z, sens) - return input_grad - - def grad_cmp(self): - single_results = self.grad_mindspore_impl_single() - reduce_results = self.grad_mindspore_impl_reduce() - single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] - reduce_result0 = reduce_results[0].asnumpy() - single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] - reduce_result1 = reduce_results[1].asnumpy() - single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] - reduce_result2 = reduce_results[2].asnumpy() - assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) - assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) - assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) - - -def test_reduce_grad(): - inputx_shape = (32, 64) - inputy_shape = (64, 64) - inputz_shape = (64, 32) - fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) - fact.grad_cmp() - - -def test_reduce_grad_repeat(): - inputx_shape = (32, 64) - inputy_shape = (64, 64) - inputz_shape = (64, 32) - fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class MatmulSingle(Cell): + def __init__(self, transpose_a=False, transpose_b=False): + super(MatmulSingle, self).__init__() + self.matmul1 = P.MatMul(transpose_a, transpose_b) + self.matmul2 = P.MatMul(transpose_a, transpose_b) + self.pow = P.Pow() + self.reduce_sum = P.ReduceSum() + + def construct(self, x, y, z): + out = self.matmul1(x, y) + out = self.matmul2(out, z) + out = self.pow(out, 2.0) + out = self.reduce_sum(out, None) + return out + + +class MatmulReduce(Cell): + def __init__(self, group, transpose_a=False, transpose_b=False): + super(MatmulReduce, self).__init__() + self.matmul1 = P.MatMul(transpose_a, transpose_b) + self.allreduce1 = P.AllReduce(group=group) + self.matmul2 = P.MatMul(transpose_a, transpose_b) + self.pow = P.Pow() + self.reduce_sum = P.ReduceSum() + self.allreduce2 = P.AllReduce(group=group) + + def construct(self, x, y, z): + out = self.matmul1(x, y) + out = self.allreduce1(out) + out = self.matmul2(out, z) + out = self.pow(out, 2.0) + out = self.reduce_sum(out, None) + out = self.allreduce2(out) + return out + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, z, sens): + return grad_all_with_sens(self.network)(x, y, z, sens) + + +class MatmulReduceFactory: + def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): + self.inputx = self.gen_value(inputx_shape, 10) + self.inputy = self.gen_value(inputy_shape, 20) + self.inputz = self.gen_value(inputz_shape, 30) + self.x_stra = x_stra + self.y_stra = y_stra + self.z_stra = z_stra + stra_size = 1 + for s in x_stra: + stra_size = stra_size * s + self.stra_size = stra_size + + def gen_value(self, input_shape, delta): + size = 1 + for s in input_shape: + size = size * s + number_range = min(100, size) + input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) + return input_np + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def grad_mindspore_impl_single(self): + x = Tensor(self.inputx) + y = Tensor(self.inputy) + z = Tensor(self.inputz) + sens = Tensor(1.0, dtype=ms.float32) + net = MatmulSingle() + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, z, sens) + return input_grad + + def grad_mindspore_impl_reduce(self): + inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) + inputys = self.get_parallel_blocks(self.inputy, self.y_stra) + inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) + x = Tensor(inputxs[device_id % self.stra_size]) + y = Tensor(inputys[device_id % self.stra_size]) + z = Tensor(inputzs[device_id % self.stra_size]) + repeat_num = device_num / self.stra_size + v = self.stra_size * repeat_num * repeat_num * repeat_num + sens = Tensor(1.0 / v, dtype=ms.float32) + net = MatmulReduce("hccl_world_group") + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, z, sens) + return input_grad + + def grad_cmp(self): + single_results = self.grad_mindspore_impl_single() + reduce_results = self.grad_mindspore_impl_reduce() + single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] + reduce_result0 = reduce_results[0].asnumpy() + single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] + reduce_result1 = reduce_results[1].asnumpy() + single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] + reduce_result2 = reduce_results[2].asnumpy() + assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) + assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) + assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) + + +def test_reduce_grad(): + inputx_shape = (32, 64) + inputy_shape = (64, 64) + inputz_shape = (64, 32) + fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) + fact.grad_cmp() + + +def test_reduce_grad_repeat(): + inputx_shape = (32, 64) + inputy_shape = (64, 64) + inputz_shape = (64, 32) + fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py index a9cd30cccf..866efdeba7 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py @@ -1,207 +1,206 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class L2normalize(Cell): - def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): - super(L2normalize, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.l2norm = P.L2Normalize(axis, epsilon, strategy1) - - def construct(self, x, y): - out = self.add(x, y) - out = self.l2norm(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class L2normalizeFactory: - def __init__(self, input_shape, axis, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = input_shape - self.target_shape = target_shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.axis = axis - self.epsilon = 1e-4 - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = strategy1[1] - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = L2normalize(self.axis, self.epsilon) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = L2normalize(self.axis, self.epsilon) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_l2normalize_input_128_512(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.forward_cmp() - - -def test_reid_l2normalize_grad_input_128_512(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.grad_cmp() - - -def test_reid_l2normalize_input_128_512_repeat(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) - fact.forward_cmp() - - -def test_reid_l2normalize_grad_input_128_512_repeat(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class L2normalize(Cell): + def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): + super(L2normalize, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.l2norm = P.L2Normalize(axis, epsilon, strategy1) + + def construct(self, x, y): + out = self.add(x, y) + out = self.l2norm(out) + return out + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class L2normalizeFactory: + def __init__(self, input_shape, axis, strategy0, strategy1): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( + np.float32) + target_shape = input_shape + self.target_shape = target_shape + target_size = 1 + for s in target_shape: + target_size = target_size * s + number_range = min(1000, target_size) + self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, + target_shape).astype(np.float32) + self.axis = axis + self.epsilon = 1e-4 + self.strategy0 = strategy0 + self.strategy1 = strategy1 + out_strategy = strategy1[1] + self.out_strategy = out_strategy + need_dev_num0 = 1 + need_dev_num1 = 1 + for s in strategy0[1]: + need_dev_num0 = need_dev_num0 * s + for s in out_strategy: + need_dev_num1 = need_dev_num1 * s + self.x_id = device_id % need_dev_num0 + self.y_id = device_id % need_dev_num0 + self.out_id = device_id % need_dev_num1 + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + net = L2normalize(self.axis, self.epsilon) + out = net(x, y) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + net = L2normalize(self.axis, self.epsilon) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad1 = Tensor(outgrads[self.out_id]) + net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], + parallel_inputs_run=[x1, y1, output_grad1]) + return input_grad + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +def test_reid_l2normalize_input_128_512(): + input_shape = (128, 512) + axis = 0 + fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) + fact.forward_cmp() + + +def test_reid_l2normalize_grad_input_128_512(): + input_shape = (128, 512) + axis = 0 + fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) + fact.grad_cmp() + + +def test_reid_l2normalize_input_128_512_repeat(): + input_shape = (128, 512) + axis = 0 + fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) + fact.forward_cmp() + + +def test_reid_l2normalize_grad_input_128_512_repeat(): + input_shape = (128, 512) + axis = 0 + fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py index b26b718068..6b8288e4bd 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py @@ -1,196 +1,195 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class AddRelu(Cell): - def __init__(self, strategy0=None, strategy1=None): - super(AddRelu, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.relu = P.ReLU(strategy=strategy1) - - def construct(self, x, y): - out = self.add(x, y) - out = self.relu(out) - return out - - -class NetWithLoss(Cell): - def __init__(self, network, strategy2=None): - super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) - self.network = network - - def construct(self, x, y, b): - predict = self.network(x, y) - return self.loss(predict, b)[0] - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, b): - return grad_all(self.network)(x, y, b) - - -class AddReluFactory: - def __init__(self, input_shape, strategy0, strategy1, strategy2): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = input_shape - self.target_shape = target_shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(10, target_size) - self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( - np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - self.strategy2 = strategy2 - out_strategy = strategy1[1] - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = AddRelu() - net_with_loss = NetWithLoss(net) - grad_net = Grad(net_with_loss) - grad_net.set_train() - input_grads = [] - for i in range(0, 3): - input_grad = grad_net(x, y, output_grad) - input_grads.append(input_grad) - return input_grads - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - net_with_loss = NetWithLoss(net, strategy2=self.strategy2) - grad_net = Grad(net_with_loss) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grads = [] - for i in range(0, 3): - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad1]) - input_grads.append(input_grad) - return input_grads - - def grad_cmp(self): - input_grad_mindspores = self.grad_mindspore_impl() - input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() - for i in range(0, len(input_grad_mindspores)): - input_grad_mindspore = input_grad_mindspores[i] - input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", - input_grad_blocks_0[self.x_id]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", - input_grad_blocks_1[self.y_id]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", - input_grad_mindspore_parallel0) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", - input_grad_mindspore_parallel1) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_l2normalize_grad_input_128_512(): - input_shape = (128, 512) - fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), - strategy2=(0, (4, 1), (4, 1))) - fact.grad_cmp() - - -def test_reid_l2normalize_grad_input_128_512_stridesplit(): - input_shape = (128, 512) - fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), - strategy2=(0, (4, 1), (4, 1))) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class AddRelu(Cell): + def __init__(self, strategy0=None, strategy1=None): + super(AddRelu, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.relu = P.ReLU(strategy=strategy1) + + def construct(self, x, y): + out = self.add(x, y) + out = self.relu(out) + return out + + +class NetWithLoss(Cell): + def __init__(self, network, strategy2=None): + super(NetWithLoss, self).__init__() + self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) + self.network = network + + def construct(self, x, y, b): + predict = self.network(x, y) + return self.loss(predict, b)[0] + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, b): + return grad_all(self.network)(x, y, b) + + +class AddReluFactory: + def __init__(self, input_shape, strategy0, strategy1, strategy2): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( + np.float32) + target_shape = input_shape + self.target_shape = target_shape + target_size = 1 + for s in target_shape: + target_size = target_size * s + number_range = min(10, target_size) + self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( + np.float32) + self.strategy0 = strategy0 + self.strategy1 = strategy1 + self.strategy2 = strategy2 + out_strategy = strategy1[1] + self.out_strategy = out_strategy + need_dev_num0 = 1 + need_dev_num1 = 1 + for s in strategy0[1]: + need_dev_num0 = need_dev_num0 * s + for s in out_strategy: + need_dev_num1 = need_dev_num1 * s + self.x_id = device_id % need_dev_num0 + self.y_id = device_id % need_dev_num0 + self.out_id = device_id % need_dev_num1 + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def grad_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + net = AddRelu() + net_with_loss = NetWithLoss(net) + grad_net = Grad(net_with_loss) + grad_net.set_train() + input_grads = [] + for i in range(0, 3): + input_grad = grad_net(x, y, output_grad) + input_grads.append(input_grad) + return input_grads + + def grad_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad1 = Tensor(outgrads[self.out_id]) + net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) + net_with_loss = NetWithLoss(net, strategy2=self.strategy2) + grad_net = Grad(net_with_loss) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grads = [] + for i in range(0, 3): + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], + parallel_inputs_run=[x1, y1, output_grad1]) + input_grads.append(input_grad) + return input_grads + + def grad_cmp(self): + input_grad_mindspores = self.grad_mindspore_impl() + input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() + for i in range(0, len(input_grad_mindspores)): + input_grad_mindspore = input_grad_mindspores[i] + input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", + input_grad_blocks_0[self.x_id]) + np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", + input_grad_blocks_1[self.y_id]) + np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", + input_grad_mindspore_parallel0) + np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", + input_grad_mindspore_parallel1) + assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +def test_reid_l2normalize_grad_input_128_512(): + input_shape = (128, 512) + fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), + strategy2=(0, (4, 1), (4, 1))) + fact.grad_cmp() + + +def test_reid_l2normalize_grad_input_128_512_stridesplit(): + input_shape = (128, 512) + fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), + strategy2=(0, (4, 1), (4, 1))) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py index 8ce6860495..37ae0f72b0 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py @@ -1,329 +1,329 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -from numpy import allclose - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Matmul(Cell): - def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): - super(Matmul, self).__init__() - self.add = P.TensorAdd(strategy=strategy1) - self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) - - def construct(self, x, w, z): - out = self.add(x, z) - return self.matmul(out, w) - - -class BatchMatMul(Cell): - def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): - super(BatchMatMul, self).__init__() - self.add = P.TensorAdd(strategy=strategy1) - self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) - - def construct(self, x, w, z): - out = self.add(x, z) - return self.batchmatmul(out, w) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, inputa, inputb, inputz, output_grad): - gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) - return gout - - -class BatchmatmulFactory: - def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): - self.strategy = strategy - self.strategy_ = strategy_ - inputa_size = 1 - inputb_size = 1 - prefix = "" - for s in inputa_shape: - prefix = prefix + str(s) + "_" - inputa_size = inputa_size * s - prefix = prefix + "and" - for s in inputb_shape: - prefix = prefix + str(s) + "_" - inputb_size = inputb_size * s - number_rangea = min(1000, inputa_size) - number_rangeb = min(1000, inputb_size) - self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( - np.float32) - self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( - np.float32) - self.inputz = np.zeros(self.inputa.shape).astype(np.float32) - self.transpose_a = transpose_a - self.transpose_b = transpose_b - - out_shape = [] - device_matrix = [] - out_strategy = [] - if transpose_a: - temp = inputa_shape[-1] - inputa_shape[-1] = inputa_shape[-2] - inputa_shape[-2] = temp - if transpose_b: - temp = inputb_shape[-1] - inputb_shape[-1] = inputb_shape[-2] - inputb_shape[-2] = temp - - if (len(inputa_shape) >= len(inputb_shape)): - out_shape = list(inputa_shape) - out_shape[-1] = inputb_shape[-1] - else: - out_shape = list(inputb_shape) - out_shape[-2] = inputa_shape[-2] - - strategy1 = list(self.strategy[1]) - strategy2 = list(self.strategy[2]) - if transpose_a: - temp = strategy1[-1] - strategy1[-1] = strategy1[-2] - strategy1[-2] = temp - if transpose_b: - temp = strategy2[-1] - strategy2[-1] = strategy2[-2] - strategy2[-2] = temp - - if (len(strategy1) >= len(strategy2)): - out_strategy = strategy1.copy() - out_strategy[-1] = strategy2[-1] - else: - out_strategy = strategy2.copy() - out_strategy[-2] = strategy1[-2] - device_matrix = out_strategy.copy() - device_matrix.insert(-1, strategy1[-1]) - self.out_strategy = out_strategy - - need_dev_num = 1 - for s in device_matrix: - need_dev_num = need_dev_num * s - self.need_dev_num = need_dev_num - self.device_matrix = device_matrix - - out_size = 1 - for s in out_shape: - out_size = out_size * s - number_range = min(1000, out_size) - self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( - np.float32) - - device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) - x_index = device_index[:-1].copy() - if transpose_a: - temp = x_index[-1] - x_index[-1] = x_index[-2] - x_index[-2] = temp - y_index = device_index[:-3].copy() - y_index.append(device_index[-2]) - y_index.append(device_index[-1]) - if transpose_b: - temp = y_index[-1] - y_index[-1] = y_index[-2] - y_index[-2] = temp - - out_index = device_index[:-2].copy() - out_index.append(device_index[-1]) - - print(device_matrix) - print(device_index) - - need_dev_num_ = 1 - for s in strategy_[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num_ - self.y_id = self.list_to_id(y_index, self.strategy[2]) - self.out_id = self.list_to_id(out_index, self.out_strategy) - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - """ - shape:每一维的上限,如(2,4,8) - """ - - def id_to_list(self, id, shape): - result = [] - r = id - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def forward_mindspore_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b) - else: - matmul = Matmul(self.transpose_a, self.transpose_b) - matmul.set_train() - out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) - return out_me.asnumpy() - - def forward_mindspore_parallel_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - else: - matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) - ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) - zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) - x1 = Tensor(xs[self.x_id]) # - y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 - z1 = Tensor(zs[self.x_id]) - matmul.set_train() - matmul.set_auto_parallel() - out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) - return out_me.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b) - else: - matmul = Matmul(self.transpose_a, self.transpose_b) - net_me = Grad(matmul) - net_me.set_train() - out_grad_me = Tensor(self.output_grad_np) - out_grad = net_me(x, y, z, out_grad_me) - return out_grad - - def grad_mindspore_parallel_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - else: - matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - out_grad_me = Tensor(self.output_grad_np) - - xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) - ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) - zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) - out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - - x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 - y1 = Tensor(ys[self.y_id]) # - z1 = Tensor(zs[self.x_id]) - out_grad1 = Tensor(out_grads[self.out_id]) - net_me = Grad(matmul) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net_me.set_auto_parallel() - net_me.set_train() - - out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], - parallel_inputs_run=[x1, y1, z1, out_grad1]) - return out_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) - input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) - input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) - assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) - assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) - assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) - - -def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.forward_cmp() - - -def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.grad_cmp() - - -def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.forward_cmp() - - -def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +from numpy import allclose + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Matmul(Cell): + def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): + super(Matmul, self).__init__() + self.add = P.TensorAdd(strategy=strategy1) + self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) + + def construct(self, x, w, z): + out = self.add(x, z) + return self.matmul(out, w) + + +class BatchMatMul(Cell): + def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): + super(BatchMatMul, self).__init__() + self.add = P.TensorAdd(strategy=strategy1) + self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) + + def construct(self, x, w, z): + out = self.add(x, z) + return self.batchmatmul(out, w) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, inputa, inputb, inputz, output_grad): + gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) + return gout + + +class BatchmatmulFactory: + def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): + self.strategy = strategy + self.strategy_ = strategy_ + inputa_size = 1 + inputb_size = 1 + prefix = "" + for s in inputa_shape: + prefix = prefix + str(s) + "_" + inputa_size = inputa_size * s + prefix = prefix + "and" + for s in inputb_shape: + prefix = prefix + str(s) + "_" + inputb_size = inputb_size * s + number_rangea = min(1000, inputa_size) + number_rangeb = min(1000, inputb_size) + self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( + np.float32) + self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( + np.float32) + self.inputz = np.zeros(self.inputa.shape).astype(np.float32) + self.transpose_a = transpose_a + self.transpose_b = transpose_b + + out_shape = [] + device_matrix = [] + out_strategy = [] + if transpose_a: + temp = inputa_shape[-1] + inputa_shape[-1] = inputa_shape[-2] + inputa_shape[-2] = temp + if transpose_b: + temp = inputb_shape[-1] + inputb_shape[-1] = inputb_shape[-2] + inputb_shape[-2] = temp + + if len(inputa_shape) >= len(inputb_shape): + out_shape = list(inputa_shape) + out_shape[-1] = inputb_shape[-1] + else: + out_shape = list(inputb_shape) + out_shape[-2] = inputa_shape[-2] + + strategy1 = list(self.strategy[1]) + strategy2 = list(self.strategy[2]) + if transpose_a: + temp = strategy1[-1] + strategy1[-1] = strategy1[-2] + strategy1[-2] = temp + if transpose_b: + temp = strategy2[-1] + strategy2[-1] = strategy2[-2] + strategy2[-2] = temp + + if len(strategy1) >= len(strategy2): + out_strategy = strategy1.copy() + out_strategy[-1] = strategy2[-1] + else: + out_strategy = strategy2.copy() + out_strategy[-2] = strategy1[-2] + device_matrix = out_strategy.copy() + device_matrix.insert(-1, strategy1[-1]) + self.out_strategy = out_strategy + + need_dev_num = 1 + for s in device_matrix: + need_dev_num = need_dev_num * s + self.need_dev_num = need_dev_num + self.device_matrix = device_matrix + + out_size = 1 + for s in out_shape: + out_size = out_size * s + number_range = min(1000, out_size) + self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( + np.float32) + + device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) + x_index = device_index[:-1].copy() + if transpose_a: + temp = x_index[-1] + x_index[-1] = x_index[-2] + x_index[-2] = temp + y_index = device_index[:-3].copy() + y_index.append(device_index[-2]) + y_index.append(device_index[-1]) + if transpose_b: + temp = y_index[-1] + y_index[-1] = y_index[-2] + y_index[-2] = temp + + out_index = device_index[:-2].copy() + out_index.append(device_index[-1]) + + print(device_matrix) + print(device_index) + + need_dev_num_ = 1 + for s in strategy_[1]: + need_dev_num_ = need_dev_num_ * s + self.x_id = device_id % need_dev_num_ + self.y_id = self.list_to_id(y_index, self.strategy[2]) + self.out_id = self.list_to_id(out_index, self.out_strategy) + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + + def id_to_list(self, id_, shape): + """ + shape:每一维的上限,如(2,4,8) + """ + result = [] + r = id_ + for i in range(0, len(shape)): + v = 1 + for j in range(i + 1, len(shape)): + v = v * shape[j] + result.append(r // v) + r = r % v + return result + + def list_to_id(self, id_list, shape): + result = 0 + for i in range(0, len(id_list)): + v = 1 + for j in range(i + 1, len(id_list)): + v = v * shape[j] + result = result + id_list[i] * v + return result + + def forward_mindspore_impl(self): + if len(self.inputa.shape) > 2: + matmul = BatchMatMul(self.transpose_a, self.transpose_b) + else: + matmul = Matmul(self.transpose_a, self.transpose_b) + matmul.set_train() + out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) + return out_me.asnumpy() + + def forward_mindspore_parallel_impl(self): + if len(self.inputa.shape) > 2: + matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) + else: + matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + x = Tensor(self.inputa) + y = Tensor(self.inputb) + z = Tensor(self.inputz) + xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) + ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) + zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) + x1 = Tensor(xs[self.x_id]) # + y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 + z1 = Tensor(zs[self.x_id]) + matmul.set_train() + matmul.set_auto_parallel() + out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) + return out_me.asnumpy() + + def grad_mindspore_impl(self): + x = Tensor(self.inputa) + y = Tensor(self.inputb) + z = Tensor(self.inputz) + if len(self.inputa.shape) > 2: + matmul = BatchMatMul(self.transpose_a, self.transpose_b) + else: + matmul = Matmul(self.transpose_a, self.transpose_b) + net_me = Grad(matmul) + net_me.set_train() + out_grad_me = Tensor(self.output_grad_np) + out_grad = net_me(x, y, z, out_grad_me) + return out_grad + + def grad_mindspore_parallel_impl(self): + if len(self.inputa.shape) > 2: + matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) + else: + matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) + x = Tensor(self.inputa) + y = Tensor(self.inputb) + z = Tensor(self.inputz) + out_grad_me = Tensor(self.output_grad_np) + + xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) + ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) + zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) + out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + + x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 + y1 = Tensor(ys[self.y_id]) # + z1 = Tensor(zs[self.x_id]) + out_grad1 = Tensor(out_grads[self.out_id]) + net_me = Grad(matmul) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net_me.set_auto_parallel() + net_me.set_train() + + out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], + parallel_inputs_run=[x1, y1, z1, out_grad1]) + return out_grad + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) + input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) + input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) + assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) + assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) + assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) + + +def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): + inputa = [128, 512] + inputb = [2000, 512] + fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) + fact.forward_cmp() + + +def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): + inputa = [128, 512] + inputb = [2000, 512] + fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) + fact.grad_cmp() + + +def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): + inputa = [128, 512] + inputb = [2000, 512] + fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) + fact.forward_cmp() + + +def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): + inputa = [128, 512] + inputb = [2000, 512] + fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py index 017ee7ebc3..d4247f7319 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py @@ -1,214 +1,213 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input1, input2, output_grad): - return grad_all_with_sens(self.network)(input1, input2, output_grad) - - -class Max(Cell): - def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): - super(Max, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) - self.axis = axis - - def construct(self, input1, input2): - out = self.add(input1, input2) - return self.reduce_max(out, self.axis) - - -class MaxFactory: - def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): - self.strategy0 = strategy0 - self.strategy1 = strategy1 - self.axis = axis - self.keep_dims = keep_dims - input_size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) + "_" - input_size = input_size * s - number_range = min(1000, input_size) - self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = self.input_np1.copy() - self.out_grad_np = None - out_shape = list(input_shape) - out_shape.pop(axis) - out_size = input_size / input_shape[axis] - number_range_ = min(1000, out_size) - self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( - np.float32) - out_strategy = list(strategy1[1]) - out_strategy.pop(axis) - self.out_strategy = out_strategy - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in out_strategy: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - net = Max(axis=self.axis, keep_dims=self.keep_dims) - out = net(input1, input2) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(xs[self.x_id]) - y1 = Tensor(ys[self.y_id]) - net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - out_grad = Tensor(self.out_grad_np) - net = Max(axis=self.axis, keep_dims=self.keep_dims) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(input1, input2, out_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) - out_grad = Tensor(output_grads[self.out_id]) - xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(xs[self.x_id]) - y1 = Tensor(ys[self.y_id]) - net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], - parallel_inputs_run=[x1, y1, out_grad]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - print(out_mindspore) - print(out_mindspore_parallel) - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_max_forward_input_256_64(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), - strategy1=(0, (4, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_256_64(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), - strategy1=(0, (4, 1))) - fact.grad_cmp() - - -def test_reid_max_forward_input_128_64_32_32(): - fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), - strategy1=(0, (2, 1, 2, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_128_64_32_32(): - fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), - strategy1=(0, (2, 1, 2, 1))) - fact.grad_cmp() - - -def test_reid_max_forward_input_256_64_repeat(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_256_64_repeat(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, input1, input2, output_grad): + return grad_all_with_sens(self.network)(input1, input2, output_grad) + + +class Max(Cell): + def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): + super(Max, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) + self.axis = axis + + def construct(self, input1, input2): + out = self.add(input1, input2) + return self.reduce_max(out, self.axis) + + +class MaxFactory: + def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): + self.strategy0 = strategy0 + self.strategy1 = strategy1 + self.axis = axis + self.keep_dims = keep_dims + input_size = 1 + prefix = "" + for s in input_shape: + prefix = prefix + str(s) + "_" + input_size = input_size * s + number_range = min(1000, input_size) + self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = self.input_np1.copy() + self.out_grad_np = None + out_shape = list(input_shape) + out_shape.pop(axis) + out_size = input_size / input_shape[axis] + number_range_ = min(1000, out_size) + self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( + np.float32) + out_strategy = list(strategy1[1]) + out_strategy.pop(axis) + self.out_strategy = out_strategy + need_dev_num = 1 + need_dev_num_ = 1 + for s in strategy0[1]: + need_dev_num = need_dev_num * s + for s in out_strategy: + need_dev_num_ = need_dev_num_ * s + self.x_id = device_id % need_dev_num + self.y_id = device_id % need_dev_num + self.out_id = device_id % need_dev_num_ + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_mindspore_impl(self): + input1 = Tensor(self.input_np1) + input2 = Tensor(self.input_np2) + net = Max(axis=self.axis, keep_dims=self.keep_dims) + out = net(input1, input2) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(xs[self.x_id]) + y1 = Tensor(ys[self.y_id]) + net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + input1 = Tensor(self.input_np1) + input2 = Tensor(self.input_np2) + out_grad = Tensor(self.out_grad_np) + net = Max(axis=self.axis, keep_dims=self.keep_dims) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(input1, input2, out_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) + out_grad = Tensor(output_grads[self.out_id]) + xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(xs[self.x_id]) + y1 = Tensor(ys[self.y_id]) + net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], + parallel_inputs_run=[x1, y1, out_grad]) + return input_grad + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + print(out_mindspore) + print(out_mindspore_parallel) + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +def test_reid_max_forward_input_256_64(): + fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), + strategy1=(0, (4, 1))) + fact.forward_cmp() + + +def test_reid_max_grad_input_256_64(): + fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), + strategy1=(0, (4, 1))) + fact.grad_cmp() + + +def test_reid_max_forward_input_128_64_32_32(): + fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), + strategy1=(0, (2, 1, 2, 1))) + fact.forward_cmp() + + +def test_reid_max_grad_input_128_64_32_32(): + fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), + strategy1=(0, (2, 1, 2, 1))) + fact.grad_cmp() + + +def test_reid_max_forward_input_256_64_repeat(): + fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), + strategy1=(0, (2, 1))) + fact.forward_cmp() + + +def test_reid_max_grad_input_256_64_repeat(): + fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), + strategy1=(0, (2, 1))) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py index f37e6176d6..19bf73f38a 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py @@ -1,201 +1,200 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MulSoftmax(Cell): - def __init__(self, strategy0=None, strategy1=None, axis=0): - super(MulSoftmax, self).__init__() - self.mul = P.Mul(strategy=strategy0) - self.softmax = P.Softmax(axis=axis, strategy=strategy1) - - def construct(self, x, z): - out = self.mul(x, z) - return self.softmax(out) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class MulSoftmaxFactory: - def __init__(self, input_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = 1.0 - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in strategy1[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def forward_mindspore_impl(self): - net = MulSoftmax() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = MulSoftmax() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) - output_grad = Tensor(output_grads[self.out_id]) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_train() - grad_net.set_auto_parallel() - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) - np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, - self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_mul_softmax_input_128x64(): - stra0 = (0, (1, 4), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_mul_softmax_input_128x64(): - stra0 = (0, (1, 4), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() - - -@pytest.mark.reid_forward -def test_reid_mul_softmax_input_128x64_all_to_all(): - stra0 = (0, (4, 1), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_mul_softmax_input_128x64_all_to_all(): - stra0 = (0, (4, 1), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import pytest + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class MulSoftmax(Cell): + def __init__(self, strategy0=None, strategy1=None, axis=0): + super(MulSoftmax, self).__init__() + self.mul = P.Mul(strategy=strategy0) + self.softmax = P.Softmax(axis=axis, strategy=strategy1) + + def construct(self, x, z): + out = self.mul(x, z) + return self.softmax(out) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class MulSoftmaxFactory: + def __init__(self, input_shape, strategy0, strategy1): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = 1.0 + self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, + input_shape).astype(np.float32) + self.strategy0 = strategy0 + self.strategy1 = strategy1 + need_dev_num = 1 + need_dev_num_ = 1 + for s in strategy0[1]: + need_dev_num = need_dev_num * s + for s in strategy1[1]: + need_dev_num_ = need_dev_num_ * s + self.x_id = device_id % need_dev_num + self.y_id = device_id % need_dev_num + self.out_id = device_id % need_dev_num_ + + def forward_mindspore_impl(self): + net = MulSoftmax() + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + out = net(x, y) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(self.input_np2, ms.float32) + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + output_grad = Tensor(self.output_grad_np) + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + net = MulSoftmax() + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) + output_grad = Tensor(output_grads[self.out_id]) + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_train() + grad_net.set_auto_parallel() + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(self.input_np2, ms.float32) + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], + parallel_inputs_run=[x1, y1, output_grad]) + return input_grad + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) + out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) + np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, + self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 + assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +@pytest.mark.reid_forward +def test_reid_mul_softmax_input_128x64(): + stra0 = (0, (1, 4), ()) + stra1 = (0, (1, 4)) + fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) + fact.forward_cmp() + + +@pytest.mark.reid_grad +def test_reid_grad_mul_softmax_input_128x64(): + stra0 = (0, (1, 4), ()) + stra1 = (0, (1, 4)) + fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) + fact.grad_cmp() + + +@pytest.mark.reid_forward +def test_reid_mul_softmax_input_128x64_all_to_all(): + stra0 = (0, (4, 1), ()) + stra1 = (0, (1, 4)) + fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) + fact.forward_cmp() + + +@pytest.mark.reid_grad +def test_reid_grad_mul_softmax_input_128x64_all_to_all(): + stra0 = (0, (4, 1), ()) + stra1 = (0, (1, 4)) + fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py index 9c79a0b05f..0648d769ab 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py @@ -1,149 +1,147 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Onehot(Cell): - def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): - super(Onehot, self).__init__() - self.onehot = P.OneHot(axis, strategy=strategy) - self.depth = depth - self.on_value = Tensor(on_value, ms.float32) - self.off_value = Tensor(off_value, ms.float32) - - def construct(self, indices): - return self.onehot(indices, self.depth, self.on_value, self.off_value) - - -class OneHotFactory: - def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): - size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(10, size) - self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) - self.depth = depth - self.on_value = on_value - self.off_value = off_value - self.axis = axis - self.dtype = dtype - self.strategy0 = strategy0 - need_dev_num = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - self.x_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def forward_mindspore_impl(self): - indices = Tensor(self.input_np) - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value) - out = net(indices) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np) - inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value, strategy=self.strategy0) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) - return out.asnumpy() - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) - - -def test_reid_onehot_forward_int32_128_depth13000(): - fact = OneHotFactory(input_shape=(128,), - depth=131072, - on_value=1.000000, - off_value=0.000000, - axis=-1, - dtype="float32", - strategy0=(0, (2,))) - fact.forward_cmp() - - -def test_reid_onehot_forward_int32_131072_depth127(): - fact = OneHotFactory(input_shape=(131072,), - depth=127, - on_value=1.000000, - off_value=0.000000, - axis=-1, - dtype="float32", - strategy0=(0, (4,))) - fact.forward_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Onehot(Cell): + def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): + super(Onehot, self).__init__() + self.onehot = P.OneHot(axis, strategy=strategy) + self.depth = depth + self.on_value = Tensor(on_value, ms.float32) + self.off_value = Tensor(off_value, ms.float32) + + def construct(self, indices): + return self.onehot(indices, self.depth, self.on_value, self.off_value) + + +class OneHotFactory: + def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): + size = 1 + prefix = "" + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(10, size) + self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) + self.depth = depth + self.on_value = on_value + self.off_value = off_value + self.axis = axis + self.dtype = dtype + self.strategy0 = strategy0 + need_dev_num = 1 + for s in strategy0[1]: + need_dev_num = need_dev_num * s + self.x_id = device_id % need_dev_num + self.out_id = device_id % need_dev_num + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def grad_mindspore_impl(self): + output_grad = Tensor(self.output_grad_np) + x = Tensor(self.input_np1) + y = Tensor(self.input_np2, ms.float32) + net = AddRelu() + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def forward_mindspore_impl(self): + indices = Tensor(self.input_np) + net = Onehot(axis=self.axis, + depth=self.depth, + on_value=self.on_value, + off_value=self.off_value) + out = net(indices) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np) + inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + net = Onehot(axis=self.axis, + depth=self.depth, + on_value=self.on_value, + off_value=self.off_value, strategy=self.strategy0) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) + return out.asnumpy() + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) + + +def test_reid_onehot_forward_int32_128_depth13000(): + fact = OneHotFactory(input_shape=(128,), + depth=131072, + on_value=1.000000, + off_value=0.000000, + axis=-1, + dtype="float32", + strategy0=(0, (2,))) + fact.forward_cmp() + + +def test_reid_onehot_forward_int32_131072_depth127(): + fact = OneHotFactory(input_shape=(131072,), + depth=127, + on_value=1.000000, + off_value=0.000000, + axis=-1, + dtype="float32", + strategy0=(0, (4,))) + fact.forward_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py index 161b25430d..f0a45111bc 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py @@ -1,206 +1,206 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class PReLU(Cell): - def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): - super(PReLU, self).__init__() - self.add = P.TensorAdd(strategy=strategy1_) - self.prelu = P.PReLU(strategy=strategy_) - - def construct(self, x, z, w): - out = self.add(x, z) - return self.prelu(out, w) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input, z, w, output_grad): - return grad_all_with_sens(self.network)(input, z, w, output_grad) - - -class PReLUFactory: - def __init__(self, input_shape, strategy): - n, c = input_shape[:2] - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.channel = c - self.weight = np.array([np.float32(0.25)] * c) - self.strategy = strategy - - def forward_mindspore_impl(self): - net = PReLU(channel=self.channel, w=self.weight) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - out = net(x, z, w) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, - strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) - block_id = device_id % len(inputs) - x1 = Tensor(inputs[block_id]) - z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) - w1 = Tensor(self.weight) - - out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - net = PReLU(channel=self.channel, w=self.weight) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, z, w, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) - block_id = device_id % len(output_grads) - output_grad = Tensor(output_grads[block_id]) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, - strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - - grad_net.set_train() - inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) - x1 = Tensor(inputs[block_id]) - z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) - w1 = Tensor(self.weight) - - input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], - parallel_inputs_run=[x1, z1, w1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) - block_id = device_id % len(out_blocks) - assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() - input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) - input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) - block_id = device_id % len(input_grad_blocks) - assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) - assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_grad -def test_reid_prelu_input_128x64x112x112_repeat(): - stra = (0, (1, 1, 2, 1), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_prelu_input_128x64x112x112_repeat(): - stra = (0, (1, 1, 2, 1), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.grad_cmp() - - -@pytest.mark.reid_grad -def test_reid_prelu_input_128x64x112x112_mix(): - stra = (0, (2, 1, 1, 2), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_prelu_input_128x64x112x112_mix(): - stra = (0, (2, 1, 1, 2), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import pytest + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class PReLU(Cell): + def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): + super(PReLU, self).__init__() + self.add = P.TensorAdd(strategy=strategy1_) + self.prelu = P.PReLU(strategy=strategy_) + self.channel = channel + + def construct(self, x, z, w): + out = self.add(x, z) + return self.prelu(out, w) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, input_, z, w, output_grad): + return grad_all_with_sens(self.network)(input_, z, w, output_grad) + + +class PReLUFactory: + def __init__(self, input_shape, strategy): + n, c = input_shape[:2] + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) + self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, + input_shape).astype(np.float32) + self.channel = c + self.weight = np.array([np.float32(0.25)] * c) + self.strategy = strategy + + def forward_mindspore_impl(self): + net = PReLU(channel=self.channel, w=self.weight) + x = Tensor(self.input_np) + z = Tensor(np.zeros(self.input_np.shape), ms.float32) + w = Tensor(self.weight) + out = net(x, z, w) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, + strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + x = Tensor(self.input_np) + z = Tensor(np.zeros(self.input_np.shape), ms.float32) + w = Tensor(self.weight) + + inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) + block_id = device_id % len(inputs) + x1 = Tensor(inputs[block_id]) + z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) + w1 = Tensor(self.weight) + + out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + output_grad = Tensor(self.output_grad_np) + x = Tensor(self.input_np) + z = Tensor(np.zeros(self.input_np.shape), ms.float32) + w = Tensor(self.weight) + + net = PReLU(channel=self.channel, w=self.weight) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, z, w, output_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) + block_id = device_id % len(output_grads) + output_grad = Tensor(output_grads[block_id]) + x = Tensor(self.input_np) + z = Tensor(np.zeros(self.input_np.shape), ms.float32) + w = Tensor(self.weight) + + net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, + strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + + grad_net.set_train() + inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) + x1 = Tensor(inputs[block_id]) + z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) + w1 = Tensor(self.weight) + + input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], + parallel_inputs_run=[x1, z1, w1, output_grad]) + return input_grad + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) + block_id = device_id % len(out_blocks) + assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() + input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) + input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) + block_id = device_id % len(input_grad_blocks) + assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) + assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +@pytest.mark.reid_grad +def test_reid_prelu_input_128x64x112x112_repeat(): + stra = (0, (1, 1, 2, 1), (1)) + fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) + fact.forward_cmp() + + +@pytest.mark.reid_grad +def test_reid_grad_prelu_input_128x64x112x112_repeat(): + stra = (0, (1, 1, 2, 1), (1)) + fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) + fact.grad_cmp() + + +@pytest.mark.reid_grad +def test_reid_prelu_input_128x64x112x112_mix(): + stra = (0, (2, 1, 1, 2), (1)) + fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) + fact.forward_cmp() + + +@pytest.mark.reid_grad +def test_reid_grad_prelu_input_128x64x112x112_mix(): + stra = (0, (2, 1, 1, 2), (1)) + fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py index 82ab74d38d..24a3227da7 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py @@ -1,253 +1,252 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose as allclose_nparray - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class GradScalar(Cell): - def __init__(self, network): - super(GradScalar, self).__init__() - self.network = network - self.sens = Tensor([1.0], dtype=ms.float32) - - def construct(self, x, y): - return grad_all_with_sens(self.network)(x, y, self.sens) - - -class ReduceMean(Cell): - def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): - super(ReduceMean, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) - self.axis = axis - - def construct(self, x, y): - out = self.add(x, y) - return self.reduce_mean(out, self.axis) - - -class ReduceMeanFactory: - def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - self.keep_dims = keep_dims - self.axis = axis - target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.array([1.0], dtype=np.float32) - if len(target_shape) > 0: - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( - np.float32) + 1.0 - self.shape = target_shape - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [] - axis_ = list(axis) - if axis_[0] == -1: - axis_[0] = len(input_shape) - 1 - for i in range(0, len(input_shape)): - if i in axis_: - if keep_dims: - out_strategy.append(1) - else: - out_strategy.append(strategy1[1][i]) - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - block_id = device_id % need_dev_num0 - device_index = self.id_to_list(block_id, self.strategy1[1]) - print(device_index) - for i in axis: - device_index[i] = 0 - print(device_index) - self.out_id = self.list_to_id(device_index, self.out_strategy) - print(self.out_id) - - def id_to_list(self, id, shape): - result = [] - r = id - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - out_grad = Tensor(self.output_grad_np) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, out_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_reducemean_input_64x16(): - fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), - strategy1=(0, (4,))) - fact.forward_cmp() - - -def test_grad_reid_reducemean_input_64x16(): - fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), - strategy1=(0, (4,))) - fact.grad_cmp() - - -def test_reid_reducemean_input_64x128x28x28(): - fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), - strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) - fact.forward_cmp() - - -def test_grad_reid_reducemean_input_64x128x28x28(): - fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), - strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) - fact.grad_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +from numpy import allclose as allclose_nparray + +import mindspore as ms +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class GradScalar(Cell): + def __init__(self, network): + super(GradScalar, self).__init__() + self.network = network + self.sens = Tensor([1.0], dtype=ms.float32) + + def construct(self, x, y): + return grad_all_with_sens(self.network)(x, y, self.sens) + + +class ReduceMean(Cell): + def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): + super(ReduceMean, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) + self.axis = axis + + def construct(self, x, y): + out = self.add(x, y) + return self.reduce_mean(out, self.axis) + + +class ReduceMeanFactory: + def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( + np.float32) + self.keep_dims = keep_dims + self.axis = axis + target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape + target_size = 1 + for s in target_shape: + target_size = target_size * s + number_range = min(1000, target_size) + self.output_grad_np = np.array([1.0], dtype=np.float32) + if len(target_shape) > 0: + self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( + np.float32) + 1.0 + self.shape = target_shape + self.strategy0 = strategy0 + self.strategy1 = strategy1 + out_strategy = [] + axis_ = list(axis) + if axis_[0] == -1: + axis_[0] = len(input_shape) - 1 + for i in range(0, len(input_shape)): + if i in axis_: + if keep_dims: + out_strategy.append(1) + else: + out_strategy.append(strategy1[1][i]) + self.out_strategy = out_strategy + need_dev_num0 = 1 + need_dev_num1 = 1 + for s in strategy0[1]: + need_dev_num0 = need_dev_num0 * s + for s in out_strategy: + need_dev_num1 = need_dev_num1 * s + self.x_id = device_id % need_dev_num0 + self.y_id = device_id % need_dev_num0 + block_id = device_id % need_dev_num0 + device_index = self.id_to_list(block_id, self.strategy1[1]) + print(device_index) + for i in axis: + device_index[i] = 0 + print(device_index) + self.out_id = self.list_to_id(device_index, self.out_strategy) + print(self.out_id) + + def id_to_list(self, id_, shape): + result = [] + r = id_ + for i in range(0, len(shape)): + v = 1 + for j in range(i + 1, len(shape)): + v = v * shape[j] + result.append(r // v) + r = r % v + return result + + def list_to_id(self, id_list, shape): + result = 0 + for i in range(0, len(id_list)): + v = 1 + for j in range(i + 1, len(id_list)): + v = v * shape[j] + result = result + id_list[i] * v + return result + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) + out = net(x, y) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + out_grad = Tensor(self.output_grad_np) + net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, out_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad1 = Tensor(outgrads[self.out_id]) + net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], + parallel_inputs_run=[x1, y1, output_grad1]) + return input_grad + + def forward_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +def test_reid_reducemean_input_64x16(): + fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), + strategy1=(0, (4,))) + fact.forward_cmp() + + +def test_grad_reid_reducemean_input_64x16(): + fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), + strategy1=(0, (4,))) + fact.grad_cmp() + + +def test_reid_reducemean_input_64x128x28x28(): + fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), + strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) + fact.forward_cmp() + + +def test_grad_reid_reducemean_input_64x128x28x28(): + fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), + strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) + fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py index 70532e5c81..cbfdd511d7 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py @@ -1,206 +1,206 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose as allclose_nparray - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class Reshape(Cell): - def __init__(self, target_shape, strategy0=None, strategy1=None): - super(Reshape, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reshape = P.Reshape(strategy=strategy1) - self.shape = tuple(target_shape) - - def construct(self, input1, input2): - x = self.add(input1, input2) - return self.reshape(x, self.shape) - - -class ReshapeFactory: - def __init__(self, input_shape, target_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.target_shape = target_shape - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [1] * len(target_shape) - out_strategy[0] = strategy1[1][0] - self.out_strategy = out_strategy - - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_reshape_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = Reshape(self.target_shape) - out = net(x, y) - return out.asnumpy() - - def forward_reshape_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_reshape_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = Reshape(self.target_shape) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_reshape_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_reshape_cmp(self): - out_mindspore = self.forward_reshape_mindspore_impl() - out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_reshape_cmp(self): - input_grad_mindspore = self.grad_reshape_mindspore_impl() - input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_reshape_input_128x512x7x7_target_128x25088(): - fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), - strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) - fact.forward_reshape_cmp() - - -def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): - fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), - strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) - fact.grad_reshape_cmp() - - -@pytest.mark.reid_forward -def test_reid_reshape_input_128x64_target_128x64x1x1(): - fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.forward_reshape_cmp() - - -@pytest.mark.reid_grad -def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): - fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.grad_reshape_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import pytest +from numpy import allclose as allclose_nparray + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class Reshape(Cell): + def __init__(self, target_shape, strategy0=None, strategy1=None): + super(Reshape, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.reshape = P.Reshape(strategy=strategy1) + self.shape = tuple(target_shape) + + def construct(self, input1, input2): + x = self.add(input1, input2) + return self.reshape(x, self.shape) + + +class ReshapeFactory: + def __init__(self, input_shape, target_shape, strategy0, strategy1): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( + np.float32) + target_size = 1 + for s in target_shape: + target_size = target_size * s + number_range = min(1000, target_size) + self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, + target_shape).astype(np.float32) + self.target_shape = target_shape + self.strategy0 = strategy0 + self.strategy1 = strategy1 + out_strategy = [1] * len(target_shape) + out_strategy[0] = strategy1[1][0] + self.out_strategy = out_strategy + + need_dev_num0 = 1 + need_dev_num1 = 1 + for s in strategy0[1]: + need_dev_num0 = need_dev_num0 * s + for s in out_strategy: + need_dev_num1 = need_dev_num1 * s + self.x_id = device_id % need_dev_num0 + self.y_id = device_id % need_dev_num0 + self.out_id = device_id % need_dev_num1 + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def forward_reshape_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + net = Reshape(self.target_shape) + out = net(x, y) + return out.asnumpy() + + def forward_reshape_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_reshape_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + net = Reshape(self.target_shape) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def grad_reshape_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad1 = Tensor(outgrads[self.out_id]) + net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], + parallel_inputs_run=[x1, y1, output_grad1]) + return input_grad + + def forward_reshape_cmp(self): + out_mindspore = self.forward_reshape_mindspore_impl() + out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_reshape_cmp(self): + input_grad_mindspore = self.grad_reshape_mindspore_impl() + input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +@pytest.mark.reid_forward +def test_reid_reshape_input_128x512x7x7_target_128x25088(): + fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), + strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) + fact.forward_reshape_cmp() + + +def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): + fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), + strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) + fact.grad_reshape_cmp() + + +@pytest.mark.reid_forward +def test_reid_reshape_input_128x64_target_128x64x1x1(): + fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), + strategy1=(0, (2, 1))) + fact.forward_reshape_cmp() + + +@pytest.mark.reid_grad +def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): + fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), + strategy1=(0, (2, 1))) + fact.grad_reshape_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py index 7fd5462200..6a6fe1a79e 100644 --- a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py +++ b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py @@ -1,236 +1,235 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import pytest -from numpy import allclose as allclose_nparray - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Net(Cell): - def __init__(self, perm_in, strategy0=None, strategy1=None): - super(Net, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.transpose = P.Transpose(strategy=strategy1) - self.perm_in = perm_in - - def construct(self, x, y): - out = self.add(x, y) - return self.transpose(out, self.perm_in) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class TransposeFactory: - def __init__(self, input_shape, perm_in, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = self.input_np1.transpose(perm_in).shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.target_shape = target_shape - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.perm_in = perm_in - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [] - for i in perm_in: - out_strategy.append(strategy1[1][i]) - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - device_index = self.id_to_list(device_id % need_dev_num1, - self.strategy1[1]) # encoding to get the index before transpose - device_index_transpose = [] - for i in perm_in: - device_index_transpose.append(device_index[i]) - self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def id_to_list(self, id, shape): - result = [] - r = id - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = Net(self.perm_in) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = Net(self.perm_in) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_transpose_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_transpose_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): - fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): - fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) - fact.grad_transpose_cmp() - - -def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.grad_transpose_cmp() - - -def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) - fact.grad_transpose_cmp() +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +from numpy import allclose as allclose_nparray + +import mindspore.communication.management as distributedTool +from mindspore import context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.ops.composite import grad_all_with_sens + +device_num = 4 +device_id = int(os.environ["RANK_ID"]) +path = "./output/" + + +def setup_module(): + print("~~~~~~~~~~~set up~~~~~~~~~~~~~") + context.set_context(mode=context.GRAPH_MODE) + context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) + distributedTool.init() + distributedTool.create_group("0-3", [0, 1, 2, 3]) + print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") + + +def teardown_module(): + print("~~~~~~~~~~~~tear down~~~~~~~~~~") + + +class Net(Cell): + def __init__(self, perm_in, strategy0=None, strategy1=None): + super(Net, self).__init__() + self.add = P.TensorAdd(strategy=strategy0) + self.transpose = P.Transpose(strategy=strategy1) + self.perm_in = perm_in + + def construct(self, x, y): + out = self.add(x, y) + return self.transpose(out, self.perm_in) + + +class Grad(Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + + def construct(self, x, y, output_grad): + return grad_all_with_sens(self.network)(x, y, output_grad) + + +class TransposeFactory: + def __init__(self, input_shape, perm_in, strategy0, strategy1): + prefix = "" + size = 1 + for s in input_shape: + prefix = prefix + str(s) + size = size * s + self.prefix = prefix + number_range = min(1000, size) + self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( + np.float32) + self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( + np.float32) + target_shape = self.input_np1.transpose(perm_in).shape + target_size = 1 + for s in target_shape: + target_size = target_size * s + number_range = min(1000, target_size) + self.target_shape = target_shape + self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, + target_shape).astype(np.float32) + self.perm_in = perm_in + self.strategy0 = strategy0 + self.strategy1 = strategy1 + out_strategy = [] + for i in perm_in: + out_strategy.append(strategy1[1][i]) + self.out_strategy = out_strategy + need_dev_num0 = 1 + need_dev_num1 = 1 + for s in strategy0[1]: + need_dev_num0 = need_dev_num0 * s + for s in out_strategy: + need_dev_num1 = need_dev_num1 * s + self.x_id = device_id % need_dev_num0 + self.y_id = device_id % need_dev_num0 + device_index = self.id_to_list(device_id % need_dev_num1, + self.strategy1[1]) # encoding to get the index before transpose + device_index_transpose = [] + for i in perm_in: + device_index_transpose.append(device_index[i]) + self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) + + def get_parallel_blocks(self, input_, strategy): + blocks = [input_] + i = 0 + for stra in strategy: + temp = [] + while len(blocks) > 0: + block = blocks.pop(0) + temp.extend(np.split(block, stra, axis=i)) + blocks.extend(temp) + i += 1 + return blocks + + def id_to_list(self, id_, shape): + result = [] + r = id_ + for i in range(0, len(shape)): + v = 1 + for j in range(i + 1, len(shape)): + v = v * shape[j] + result.append(r // v) + r = r % v + return result + + def list_to_id(self, id_list, shape): + result = 0 + for i in range(0, len(id_list)): + v = 1 + for j in range(i + 1, len(id_list)): + v = v * shape[j] + result = result + id_list[i] * v + return result + + def forward_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + net = Net(self.perm_in) + out = net(x, y) + return out.asnumpy() + + def forward_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + net.set_auto_parallel() + out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) + return out.asnumpy() + + def grad_mindspore_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + net = Net(self.perm_in) + grad_net = Grad(net) + grad_net.set_train() + input_grad = grad_net(x, y, output_grad) + return input_grad + + def grad_mindspore_parallel_impl(self): + x = Tensor(self.input_np1) + y = Tensor(self.input_np2) + output_grad = Tensor(self.output_grad_np) + inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) + inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) + outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) + x1 = Tensor(inputs_x[self.x_id]) + y1 = Tensor(inputs_y[self.y_id]) + output_grad1 = Tensor(outgrads[self.out_id]) + net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) + grad_net = Grad(net) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + grad_net.set_auto_parallel() + grad_net.set_train() + input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], + parallel_inputs_run=[x1, y1, output_grad1]) + return input_grad + + def forward_transpose_cmp(self): + out_mindspore = self.forward_mindspore_impl() + out_mindspore_parallel = self.forward_mindspore_parallel_impl() + out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) + assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) + + def grad_transpose_cmp(self): + input_grad_mindspore = self.grad_mindspore_impl() + input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() + input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() + input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() + input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() + input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() + input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) + input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) + assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) + assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) + + +def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): + fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) + fact.forward_transpose_cmp() + + +def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): + fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) + fact.grad_transpose_cmp() + + +def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): + fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) + fact.forward_transpose_cmp() + + +def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): + fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) + fact.grad_transpose_cmp() + + +def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): + fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) + fact.forward_transpose_cmp() + + +def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): + fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) + fact.grad_transpose_cmp() diff --git a/tests/ut/python/parallel/test_add_relu_redistribution.py b/tests/ut/python/parallel/test_add_relu_redistribution.py index 7a9934fe39..08ef18699a 100644 --- a/tests/ut/python/parallel/test_add_relu_redistribution.py +++ b/tests/ut/python/parallel/test_add_relu_redistribution.py @@ -54,7 +54,7 @@ class Grad(nn.Cell): return C.grad_all(self.network)(x, y) -def compile(net, x, y): +def compile_net(net, x, y): net.set_auto_parallel() _executor.compile(net, x, y) @@ -69,7 +69,7 @@ def test_add_relu_stride_slice(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) def test_add_relu_all_gather(): @@ -82,4 +82,4 @@ def test_add_relu_all_gather(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py index c593869430..607213f806 100644 --- a/tests/ut/python/parallel/test_allreduce_fusion.py +++ b/tests/ut/python/parallel/test_allreduce_fusion.py @@ -17,7 +17,6 @@ import numpy as np import mindspore as ms import mindspore.nn as nn from mindspore import Tensor, context -from mindspore import context from mindspore.common.api import _executor from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum @@ -131,56 +130,56 @@ def test_allreduce_fusion_parameters(): cost_model_context.reset_cost_model_context() cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2) algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') - assert (algorithm == 2) + assert algorithm == 2 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') - assert (algorithm == 1) + assert algorithm == 1 cost_model_context.reset_cost_model_context() algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') - assert (algorithm == 0) + assert algorithm == 0 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) fusion_times = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_times') - assert (fusion_times == 2) + assert fusion_times == 2 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.2) tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent') - assert (tail_percent == 0.2) + assert tail_percent == 0.2 cost_model_context.reset_cost_model_context() tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent') - assert (tail_percent == 0.1) + assert tail_percent == 0.1 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.2) tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time') - assert (tail_time == 0.2) + assert tail_time == 0.2 cost_model_context.reset_cost_model_context() tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time') - assert (tail_time == 0.1) + assert tail_time == 0.1 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.2) allreduce_inherent_time = cost_model_context.get_cost_model_context( 'costmodel_allreduce_fusion_allreduce_inherent_time') - assert (allreduce_inherent_time == 0.2) + assert allreduce_inherent_time == 0.2 cost_model_context.reset_cost_model_context() allreduce_inherent_time = cost_model_context.get_cost_model_context( 'costmodel_allreduce_fusion_allreduce_inherent_time') - assert (allreduce_inherent_time == 0.1) + assert allreduce_inherent_time == 0.1 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.2) allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth') - assert (allreduce_bandwidth == 0.2) + assert allreduce_bandwidth == 0.2 cost_model_context.reset_cost_model_context() allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth') - assert (allreduce_bandwidth == 0.1) + assert allreduce_bandwidth == 0.1 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.2) computation_time_parameter = cost_model_context.get_cost_model_context( 'costmodel_allreduce_fusion_computation_time_parameter') - assert (computation_time_parameter == 0.2) + assert computation_time_parameter == 0.2 cost_model_context.reset_cost_model_context() computation_time_parameter = cost_model_context.get_cost_model_context( 'costmodel_allreduce_fusion_computation_time_parameter') - assert (computation_time_parameter == 0.1) + assert computation_time_parameter == 0.1 def test_allreduce_fusion1(): @@ -201,7 +200,7 @@ def test_allreduce_fusion1(): 'backbone2.fc2.weight': 1, 'backbone2.fc1.weight': 1, 'backbone1.fc1.weight': 1} - assert (allreduce_fusion_dict == expect_dict) + assert allreduce_fusion_dict == expect_dict cost_model_context.reset_cost_model_context() @@ -214,7 +213,7 @@ def test_allreduce_fusion2(): net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) allreduce_fusion_dict = train_common(net) expect_dict = {} - assert (allreduce_fusion_dict == expect_dict) + assert allreduce_fusion_dict == expect_dict cost_model_context.reset_cost_model_context() @@ -240,7 +239,7 @@ def test_allreduce_fusion3(): 'backbone1.fc2.weight': 2, 'backbone1.fc1.bias': 2, 'backbone1.fc1.weight': 2} - assert (allreduce_fusion_dict == expect_dict) + assert allreduce_fusion_dict == expect_dict cost_model_context.reset_cost_model_context() @@ -267,7 +266,7 @@ def test_allreduce_fusion4(): 'backbone1.fc2.weight': 1, 'backbone1.fc1.weight': 1} - assert (allreduce_fusion_dict == expect_dict) + assert allreduce_fusion_dict == expect_dict cost_model_context.reset_cost_model_context() @@ -295,7 +294,7 @@ def test_allreduce_fusion5(): 'backbone1.fc4.weight': 2, 'backbone1.fc3.weight': 2, 'backbone1.fc2.weight': 1, - 'backbone1.fc1.weight': 1, } + 'backbone1.fc1.weight': 1,} - assert (allreduce_fusion_dict == expect_dict) + assert allreduce_fusion_dict == expect_dict cost_model_context.reset_cost_model_context() diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py index 7f9fcbfa76..a03a83b0ba 100644 --- a/tests/ut/python/parallel/test_alltoall.py +++ b/tests/ut/python/parallel/test_alltoall.py @@ -67,7 +67,6 @@ def all_to_all_net(strategy1): def all_to_all_common(strategy1): - batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 @@ -104,7 +103,7 @@ def test_all_to_all(): [8, 1]], 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [ [1, 1], [1, 8]]} - assert (strategys == expect_dict) + assert strategys == expect_dict context.set_context(save_graphs=False) diff --git a/tests/ut/python/parallel/test_arithmetic.py b/tests/ut/python/parallel/test_arithmetic.py index d4a926f0f4..6bb2aac5d2 100644 --- a/tests/ut/python/parallel/test_arithmetic.py +++ b/tests/ut/python/parallel/test_arithmetic.py @@ -43,7 +43,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -69,7 +69,7 @@ def test_matmul_sub(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_add(): @@ -93,7 +93,7 @@ def test_matmul_add(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_mul(): @@ -117,7 +117,7 @@ def test_matmul_mul(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_div(): @@ -141,7 +141,7 @@ def test_matmul_div(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_greater(): @@ -165,7 +165,7 @@ def test_matmul_greater(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_add_broadcast(): @@ -189,7 +189,7 @@ def test_matmul_add_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_add_broadcast2(): @@ -213,7 +213,7 @@ def test_matmul_add_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_sub_broadcast(): @@ -237,7 +237,7 @@ def test_matmul_sub_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_sub_broadcast2(): @@ -261,7 +261,7 @@ def test_matmul_sub_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_mul_broadcast(): @@ -285,7 +285,7 @@ def test_matmul_mul_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_mul_broadcast2(): @@ -309,7 +309,7 @@ def test_matmul_mul_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_div_broadcast(): @@ -333,7 +333,7 @@ def test_matmul_div_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_div_broadcast2(): @@ -357,7 +357,7 @@ def test_matmul_div_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_greater_broadcast(): @@ -381,7 +381,7 @@ def test_matmul_greater_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_greater_broadcast2(): @@ -405,7 +405,7 @@ def test_matmul_greater_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_floordiv(): @@ -429,7 +429,7 @@ def test_matmul_floordiv(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_floordiv_broadcast(): @@ -453,7 +453,7 @@ def test_matmul_floordiv_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_floordiv_broadcast2(): @@ -477,7 +477,7 @@ def test_matmul_floordiv_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_assign_sub(): @@ -504,4 +504,4 @@ def test_assign_sub(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32) z = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, z) + compile_net(net, x, y, z) diff --git a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py index 02f0b9c77d..087065a9a3 100644 --- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py +++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py @@ -20,7 +20,6 @@ from mindspore import Tensor from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C -from mindspore.ops import operations as P from tests.ut.python.ops.test_math_ops import VirtualLoss diff --git a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py index 7eb8ddccd3..aa6177b996 100644 --- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py +++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py @@ -18,7 +18,6 @@ import mindspore as ms import mindspore.nn as nn from mindspore import Tensor from mindspore import context -from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P @@ -48,7 +47,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b, phase): +def compile_net(net, x, y, b, phase): net.set_auto_parallel() _executor.compile(net, x, y, b, phase=phase) @@ -73,7 +72,7 @@ def test_auto_parallel_arithmetic(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) b = Tensor(np.ones([64, 128]), dtype=ms.float32) - compile(net, x, y, b, phase='train') + compile_net(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]], 'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]} @@ -100,7 +99,7 @@ def test_auto_parallel_arithmetic_broadcast_both(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b, phase='train') + compile_net(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]], 'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]} @@ -127,7 +126,7 @@ def test_auto_parallel_arithmetic_broadcast_right(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 32]), dtype=ms.float32) b = Tensor(np.ones([32]), dtype=ms.float32) - compile(net, x, y, b, phase='train') + compile_net(net, x, y, b, phase='train') strategies = _executor._get_strategy(net) expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]], 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} @@ -154,7 +153,7 @@ def test_auto_parallel_arithmetic_broadcast_left(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 32]), dtype=ms.float32) b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) - compile(net, x, y, b, phase="train") + compile_net(net, x, y, b, phase="train") strategies = _executor._get_strategy(net) expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]], 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} diff --git a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py index d95473d993..75056dee73 100644 --- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py +++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import re +import numpy as np import mindspore as ms import mindspore.nn as nn @@ -21,7 +21,6 @@ from mindspore import Tensor from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C -from mindspore.ops import operations as P from mindspore.parallel._utils import _reset_op_id as reset_op_id from tests.ut.python.ops.test_math_ops import VirtualLoss diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py index 0c1b7dc81b..e121cecf8e 100644 --- a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py +++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py @@ -10,7 +10,6 @@ from mindspore.ops import composite as C from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.parallel import _cost_model_context as cost_model_context -from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters from mindspore.parallel._utils import _reset_op_id as reset_op_id diff --git a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py index 6cb9b6f7a2..7852165ee7 100644 --- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, z, w, b) -def compile(net, x, y, z, w, b): +def compile_net(net, x, y, z, w, b): net.set_auto_parallel() _executor.compile(net, x, y, z, w, b) @@ -77,7 +77,7 @@ def test_four_matmul_linear(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, z, w, b) + compile_net(net, x, y, z, w, b) def test_four_matmul1(): @@ -103,7 +103,7 @@ def test_four_matmul1(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, z, w, b) + compile_net(net, x, y, z, w, b) def test_four_matmul2(): @@ -130,4 +130,4 @@ def test_four_matmul2(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, z, w, b) + compile_net(net, x, y, z, w, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_inference.py b/tests/ut/python/parallel/test_auto_parallel_inference.py index ac8d4fc473..5181be717d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_inference.py +++ b/tests/ut/python/parallel/test_auto_parallel_inference.py @@ -36,4 +36,4 @@ def test_inference_phase(): train_network.set_train() train_network.set_auto_parallel() - output = train_network(predict, label) + _ = train_network(predict, label) diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py index 0c5caa38b8..c2bf469c6a 100644 --- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py +++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import re +import numpy as np import mindspore as ms import mindspore.nn as nn diff --git a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py index 3f4b093d63..6d75f2a9e5 100644 --- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py +++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py @@ -16,7 +16,6 @@ import numpy as np import mindspore as ms import mindspore.nn as nn -from mindspore import Tensor from mindspore import Tensor, Parameter from mindspore import context from mindspore.common import dtype as mstype diff --git a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py index 9af55db3be..2e66490498 100644 --- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py +++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -71,7 +71,7 @@ def test_sum_mul(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul2(): @@ -95,7 +95,7 @@ def test_sum_mul2(): x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul3(): @@ -119,4 +119,4 @@ def test_sum_mul3(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py index ce308cf485..3adbb43717 100644 --- a/tests/ut/python/parallel/test_auto_parallel_reshape.py +++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py @@ -215,7 +215,7 @@ def test_reshape_auto_5(): size = 8 context.set_auto_parallel_context(device_num=size, global_rank=0) x = Tensor(np.ones([4, 1024 * size, 1]), dtype=ms.float32) - y = Tensor(np.ones([4, 1024 * size, ]), dtype=ms.float32) + y = Tensor(np.ones([4, 1024 * size,]), dtype=ms.float32) net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") @@ -263,7 +263,7 @@ def test_reshape_auto_6(): size = 8 context.set_auto_parallel_context(device_num=size, global_rank=0) x = Tensor(np.ones([4, 1024, 1]), dtype=ms.float32) - y = Tensor(np.ones([4, 1024, ]), dtype=ms.float32) + y = Tensor(np.ones([4, 1024,]), dtype=ms.float32) net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") diff --git a/tests/ut/python/parallel/test_auto_parallel_rhombus.py b/tests/ut/python/parallel/test_auto_parallel_rhombus.py index fd0d2cba43..b778e9ed38 100644 --- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py +++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -72,7 +72,7 @@ def test_rhombus1(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, b) + compile_net(net, x, y, b) def test_rhombus2(): @@ -103,7 +103,7 @@ def test_rhombus2(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, b) + compile_net(net, x, y, b) def test_rhombus3(): @@ -134,4 +134,4 @@ def test_rhombus3(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net, x, y, z) + compile_net(net, x, y, z) diff --git a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py index c63a0d378d..1bcd49b8d8 100644 --- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py +++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py @@ -21,7 +21,6 @@ from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class NetWithLoss(nn.Cell): diff --git a/tests/ut/python/parallel/test_auto_parallel_transformer.py b/tests/ut/python/parallel/test_auto_parallel_transformer.py index dd4734c5a3..c208e7852c 100644 --- a/tests/ut/python/parallel/test_auto_parallel_transformer.py +++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py @@ -105,8 +105,8 @@ def test_dmnet_train_step(): size = 8 context.set_auto_parallel_context(device_num=size, global_rank=0) - input = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01) net = GradWrap(NetWithLoss(MultiTransformer())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() - _executor.compile(net, input) + _executor.compile(net, input_) diff --git a/tests/ut/python/parallel/test_auto_parallel_two_bn.py b/tests/ut/python/parallel/test_auto_parallel_two_bn.py index c6c01be5c1..3c73290b1e 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_bn.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py @@ -1,5 +1,19 @@ -import numpy as np +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re +import numpy as np import mindspore as ms import mindspore.nn as nn @@ -33,7 +47,7 @@ class Blockcell(nn.Cell): return out -def getBlock(): +def get_block(): return Blockcell() @@ -41,8 +55,8 @@ def test_two_bn(): class Net(nn.Cell): def __init__(self): super().__init__() - self.block1 = getBlock() - self.block2 = getBlock() + self.block1 = get_block() + self.block2 = get_block() self.relu = P.ReLU() self.add = P.TensorAdd() self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32) diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py index c5352fc1c7..1792687c57 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py @@ -104,23 +104,23 @@ def test_two_matmul(): set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32, fully_use_devices=False, elementwise_op_strategy_follow=False) para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") - assert para_slice_align_enable == False + assert not para_slice_align_enable para_slice_align_size = get_algo_parameters("tensor_slice_align_size") assert para_slice_align_size == 32 fully_use_devices = get_algo_parameters("fully_use_devices") - assert fully_use_devices == False + assert not fully_use_devices elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") - assert elementwise_op_strategy_follow == False + assert not elementwise_op_strategy_follow reset_algo_parameters() para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") - assert para_slice_align_enable == False + assert not para_slice_align_enable para_slice_align_size = get_algo_parameters("tensor_slice_align_size") assert para_slice_align_size == 16 fully_use_devices = get_algo_parameters("fully_use_devices") - assert fully_use_devices == True + assert fully_use_devices elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") - assert elementwise_op_strategy_follow == False + assert not elementwise_op_strategy_follow x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) diff --git a/tests/ut/python/parallel/test_auto_star_elimination.py b/tests/ut/python/parallel/test_auto_star_elimination.py index b8919c2b94..9c4678f91c 100644 --- a/tests/ut/python/parallel/test_auto_star_elimination.py +++ b/tests/ut/python/parallel/test_auto_star_elimination.py @@ -11,9 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math + import numpy as np -import os import mindspore as ms import mindspore.nn as nn @@ -21,10 +20,8 @@ from mindspore import Tensor, Parameter from mindspore import context from mindspore.common import dtype as mstype from mindspore.common.api import _executor -from mindspore.common.initializer import initializer from mindspore.nn.loss.loss import _Loss from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P from tests.ut.python.ops.test_math_ops import VirtualLoss diff --git a/tests/ut/python/parallel/test_batch_matmul.py b/tests/ut/python/parallel/test_batch_matmul.py index 7ed4029b51..f49a9c322e 100644 --- a/tests/ut/python/parallel/test_batch_matmul.py +++ b/tests/ut/python/parallel/test_batch_matmul.py @@ -41,7 +41,7 @@ _w2 = Tensor(np.ones([128, 32, 32]), dtype=ms.float32) _b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -54,7 +54,7 @@ def test_batch_matmul_data_parallel(): strategy1 = ((16, 1, 1), (16, 1, 1)) strategy2 = ((16, 1, 1), (16, 1, 1)) net = Net(_w1, _w2, False, strategy1, strategy2) - compile(net) + compile_net(net) def test_batch_matmul_model_parallel(): @@ -62,7 +62,7 @@ def test_batch_matmul_model_parallel(): strategy1 = ((1, 1, 1), (1, 1, 1)) strategy2 = ((1, 1, 1), (1, 1, 16)) net = Net(_w1, _w2, False, strategy1, strategy2) - compile(net) + compile_net(net) def test_batch_matmul_hybrid_parallel(): @@ -70,13 +70,13 @@ def test_batch_matmul_hybrid_parallel(): strategy1 = ((2, 2, 2), (2, 2, 2)) strategy2 = ((2, 2, 2), (2, 2, 2)) net = Net(_w1, _w2, False, strategy1, strategy2) - compile(net) + compile_net(net) def test_batch_matmul_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1, _w2, False) - compile(net) + compile_net(net) def test_batch_matmul_repeat_calc(): @@ -84,7 +84,7 @@ def test_batch_matmul_repeat_calc(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((1, 2, 2), (1, 2, 2)) net = Net(_w1, _w2, False, strategy1, strategy2) - compile(net) + compile_net(net) def test_batch_matmul_transpose_b(): @@ -92,4 +92,4 @@ def test_batch_matmul_transpose_b(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((1, 2, 2), (1, 2, 2)) net = Net(_w1, _w2, True, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py index d395a56a06..5935c44441 100644 --- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py +++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py @@ -30,7 +30,6 @@ from mindspore.train import Model, ParallelMode from tests.dataset_mock import MindData dev_num = 8 -strategy_no_weight = ((dev_num, 1, 1, 1),) strategy_weight = ((dev_num, 1, 1, 1), (1, 1, 1, 1)) strategy_bn = ((dev_num, 1, 1, 1), (1,), (1,)) strategy_fc_weight_bias = ((dev_num, 1), (1, 1), (1,)) @@ -62,7 +61,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0): weight_shape = (out_channels, in_channels, 7, 7) weight = Tensor(np.ones(weight_shape).astype(np.float32)) conv = Conv2d(in_channels, out_channels, - kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False, + kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same") conv.conv2d.set_strategy(strategy_weight) return conv @@ -95,7 +94,7 @@ class ResNet(Cell): def __init__(self, num_classes=100): super(ResNet, self).__init__() strategy_no_weight = ((dev_num, 1, 1, 1),) - self.conv1 = conv7x7(3, 64, stride=2, padding=3) + self.conv1 = conv7x7(3, 64, stride=2, padding=0) self.bn1 = bn_with_initialize(64) self.relu = ReLU() self.relu.relu.set_strategy(strategy_no_weight) @@ -124,7 +123,6 @@ def test_batchnorm_batch_parallel(): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 - rank_size = 0 predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32) label = Tensor(np.ones([batch_size]), dtype=ms.int32) diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py index e5c94bd241..526ab3203f 100644 --- a/tests/ut/python/parallel/test_bn_prelu_cell.py +++ b/tests/ut/python/parallel/test_bn_prelu_cell.py @@ -171,7 +171,7 @@ class PReLU(nn.Cell): if not isinstance(w, Tensor): w = Tensor(w) - self.w = Parameter(initializer(w, [channel, ]), name='a') + self.w = Parameter(initializer(w, [channel,]), name='a') self.prelu = P.PReLU() self.relu = P.ReLU().set_strategy(((1))) @@ -181,7 +181,7 @@ class PReLU(nn.Cell): class BNNet(nn.Cell): - def __init__(self, strategy0, strategy1, strategy2): + def __init__(self): super(BNNet, self).__init__() self.bn = FusedBatchNorm(512) self.prelu = PReLU(512) @@ -192,13 +192,12 @@ class BNNet(nn.Cell): return x -def bn_net(strategy0, strategy1, strategy2): - return BNNet(strategy0=strategy0, strategy1=strategy1, strategy2=strategy2) +def bn_net(): + return BNNet() -def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strategy2=None, strategy_loss=None): +def bn_common(parallel_mode, train_flag, strategy_loss=None): context.set_context(mode=context.GRAPH_MODE) - batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 @@ -207,7 +206,7 @@ def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strateg predict = Tensor(np.ones([32, 512]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) - net = bn_net(strategy0, strategy1, strategy2) + net = bn_net() loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss.softmax_cross_entropy.set_strategy(strategy_loss) diff --git a/tests/ut/python/parallel/test_bool_grad.py b/tests/ut/python/parallel/test_bool_grad.py index 6ce4e9be94..735f66bb6a 100644 --- a/tests/ut/python/parallel/test_bool_grad.py +++ b/tests/ut/python/parallel/test_bool_grad.py @@ -21,7 +21,7 @@ from mindspore import context from mindspore.common.parameter import Parameter from mindspore.nn.optim import Momentum from mindspore.ops import operations as P -from mindspore.train import Model, ParallelMode +from mindspore.train import Model from tests.dataset_mock import MindData context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/ut/python/parallel/test_broadcast_dict.py b/tests/ut/python/parallel/test_broadcast_dict.py index ccc299a93f..ff02d045ca 100644 --- a/tests/ut/python/parallel/test_broadcast_dict.py +++ b/tests/ut/python/parallel/test_broadcast_dict.py @@ -54,7 +54,7 @@ def test_param_broadcast(): network.set_train() predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01) - out = network(predict) + _ = network(predict) context.reset_auto_parallel_context() @@ -67,5 +67,5 @@ def test_param_not_broadcast(): network.set_train() predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01) - out = network(predict) + _ = network(predict) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_comparison_function_info.py b/tests/ut/python/parallel/test_comparison_function_info.py index 4ac4e4cbcb..adb5a5f395 100644 --- a/tests/ut/python/parallel/test_comparison_function_info.py +++ b/tests/ut/python/parallel/test_comparison_function_info.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -69,7 +69,7 @@ def test_matmul_equal(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_not_equal(): @@ -92,7 +92,7 @@ def test_matmul_not_equal(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_not_equal_repeated_calculation(): @@ -115,7 +115,7 @@ def test_matmul_not_equal_repeated_calculation(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_maximum(): @@ -138,7 +138,7 @@ def test_matmul_maximum(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_maximum_broadcast(): @@ -161,7 +161,7 @@ def test_matmul_maximum_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_maximum_broadcast2(): @@ -184,7 +184,7 @@ def test_matmul_maximum_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_minimum(): @@ -207,7 +207,7 @@ def test_matmul_minimum(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_minimum_broadcast(): @@ -230,7 +230,7 @@ def test_matmul_minimum_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_minimum_broadcast2(): @@ -253,7 +253,7 @@ def test_matmul_minimum_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_minimum_auto_parallel(): @@ -274,4 +274,4 @@ def test_matmul_minimum_auto_parallel(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_dataset_util.py b/tests/ut/python/parallel/test_dataset_util.py index 9f81e62b02..f3c861dd68 100644 --- a/tests/ut/python/parallel/test_dataset_util.py +++ b/tests/ut/python/parallel/test_dataset_util.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np - import mindspore as ms from mindspore import Tensor from mindspore.train._utils import _to_full_shapes, _to_full_tensor @@ -35,7 +33,7 @@ def test_to_full_tensor_1(): expect = ([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 2, 3], [4, 5, 6], [0, 0, 0], [0, 0, 0]]) expect_tensor = Tensor(expect, dtype=ms.float32) - assert (full_tensor[0] == expect_tensor) + assert full_tensor[0] == expect_tensor def test_to_full_tensor_2(): @@ -52,7 +50,7 @@ def test_to_full_tensor_2(): expect_tensor1 = Tensor(expect1, dtype=ms.int32) expect_tensors = (expect_tensor0, expect_tensor1) - assert (full_tensor == expect_tensors) + assert full_tensor == expect_tensors def test_to_full_tensor_sens_2(): @@ -70,4 +68,4 @@ def test_to_full_tensor_sens_2(): expect_tensor_sens = Tensor(0.1, dtype=ms.float32) expect_tensors = (expect_tensor0, expect_tensor1, expect_tensor_sens) - assert (full_tensor == expect_tensors) + assert full_tensor == expect_tensors diff --git a/tests/ut/python/parallel/test_dense_matmul.py b/tests/ut/python/parallel/test_dense_matmul.py index 48ee5c8d52..e408c65f84 100644 --- a/tests/ut/python/parallel/test_dense_matmul.py +++ b/tests/ut/python/parallel/test_dense_matmul.py @@ -47,8 +47,8 @@ class DenseMutMulNet(nn.Cell): def test_dmnet_train_step(): context.reset_auto_parallel_context() - input = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01) label = Tensor(np.zeros([32, 768]).astype(np.float32)) net = DenseMutMulNet() net = train_step_with_loss_warp(DenseMutMulNet()) - _executor.compile(net, input, label) + _executor.compile(net, input_, label) diff --git a/tests/ut/python/parallel/test_different_type_for_div_op.py b/tests/ut/python/parallel/test_different_type_for_div_op.py index 31af23e100..4bb09bbdc2 100644 --- a/tests/ut/python/parallel/test_different_type_for_div_op.py +++ b/tests/ut/python/parallel/test_different_type_for_div_op.py @@ -32,7 +32,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, bias) -def compile(net, x, y, bias): +def compile_net(net, x, y, bias): net.set_auto_parallel() _executor.compile(net, x, y, bias) @@ -58,7 +58,7 @@ def test_sum_as_loss_float16(): x = Tensor(np.ones([64, 32]), dtype=ms.float16) y = Tensor(np.ones([64, 32]), dtype=ms.float16) bias = Tensor(np.ones([64]), dtype=ms.float16) - compile(net, x, y, bias) + compile_net(net, x, y, bias) def test_sum_as_loss_float32(): @@ -82,7 +82,7 @@ def test_sum_as_loss_float32(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) bias = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, bias) + compile_net(net, x, y, bias) def test_sum_as_loss_int32(): @@ -106,4 +106,4 @@ def test_sum_as_loss_int32(): x = Tensor(np.ones([64, 32]), dtype=ms.int32) y = Tensor(np.ones([64, 32]), dtype=ms.int32) bias = Tensor(np.ones([64]), dtype=ms.int32) - compile(net, x, y, bias) + compile_net(net, x, y, bias) diff --git a/tests/ut/python/parallel/test_dropout_do_mask.py b/tests/ut/python/parallel/test_dropout_do_mask.py index 03bcf3c1b6..f3d8f6ef8e 100644 --- a/tests/ut/python/parallel/test_dropout_do_mask.py +++ b/tests/ut/python/parallel/test_dropout_do_mask.py @@ -50,7 +50,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32) _b = Tensor(np.ones([128, 64]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -63,7 +63,7 @@ def test_dropout_do_mask_data_parallel(): strategy1 = ((16, 1), (16, 1)) strategy2 = ((16, 1),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_dropout_do_mask_model_parallel(): @@ -71,7 +71,7 @@ def test_dropout_do_mask_model_parallel(): strategy1 = ((1, 16), (1, 16)) strategy2 = ((1, 16),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_dropout_do_mask_hybrid_parallel(): @@ -79,13 +79,13 @@ def test_dropout_do_mask_hybrid_parallel(): strategy1 = ((4, 4), (4, 4)) strategy2 = ((4, 4),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_dropout_do_mask_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1) - compile(net) + compile_net(net) def test_dropout_do_mask_repeat_calc(): @@ -93,4 +93,4 @@ def test_dropout_do_mask_repeat_calc(): strategy1 = ((4, 4), (4, 4)) strategy2 = ((2, 4),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py index 89a6893347..fd6e5cfd8d 100644 --- a/tests/ut/python/parallel/test_element_wise_function.py +++ b/tests/ut/python/parallel/test_element_wise_function.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -72,7 +72,7 @@ def test_matmul_pow(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_exp(): @@ -98,7 +98,7 @@ def test_matmul_exp(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_log(): @@ -124,7 +124,7 @@ def test_matmul_log(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_logical_not(): @@ -151,7 +151,7 @@ def test_matmul_logical_not(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_cast(): @@ -178,7 +178,7 @@ def test_matmul_cast(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.int32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cast_before_mirror(): @@ -202,7 +202,7 @@ def test_cast_before_mirror(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float16) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cast_before_mirror1(): @@ -226,7 +226,7 @@ def test_cast_before_mirror1(): x = Tensor(np.ones([128, 32]), dtype=ms.float16) y = Tensor(np.ones([32, 64]), dtype=ms.float16) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cast_before_mirror2(): @@ -250,7 +250,7 @@ def test_cast_before_mirror2(): x = Tensor(np.ones([128, 32]), dtype=ms.float16) y = Tensor(np.ones([32, 64]), dtype=ms.float16) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cast_before_mirror3(): @@ -274,7 +274,7 @@ def test_cast_before_mirror3(): x = Tensor(np.ones([128, 32]), dtype=ms.float16) y = Tensor(np.ones([32, 64]), dtype=ms.float16) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_mul_two_cast(): @@ -303,4 +303,4 @@ def test_mul_two_cast(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32) b = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_expand_dims.py b/tests/ut/python/parallel/test_expand_dims.py index 649adcdbe9..9d144ed50d 100644 --- a/tests/ut/python/parallel/test_expand_dims.py +++ b/tests/ut/python/parallel/test_expand_dims.py @@ -54,7 +54,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) _b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -68,7 +68,7 @@ def test_expand_dims_data_parallel(): strategy2 = ((16, 1, 1),) strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1)) net = Net(_w1, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_expand_dims_model_parallel(): @@ -77,7 +77,7 @@ def test_expand_dims_model_parallel(): strategy2 = ((1, 1, 16),) strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1)) net = Net(_w1, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_expand_dims_hybrid_parallel(): @@ -86,13 +86,13 @@ def test_expand_dims_hybrid_parallel(): strategy2 = ((2, 2, 4),) strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net(_w1, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_expand_dims_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1) - compile(net) + compile_net(net) def test_expand_dims_repeat_calc(): @@ -101,7 +101,7 @@ def test_expand_dims_repeat_calc(): strategy2 = ((1, 2, 2),) strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net(_w1, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_expand_dims_parameter(): @@ -109,4 +109,4 @@ def test_expand_dims_parameter(): strategy1 = ((1, 2, 2),) strategy2 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net2(_w1, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py index f84dfe05d6..c8561210c5 100644 --- a/tests/ut/python/parallel/test_forward_graph.py +++ b/tests/ut/python/parallel/test_forward_graph.py @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) -def compile(net): +def compile_net(net): net.set_auto_parallel() _executor.compile(net, _x, _b) context.reset_auto_parallel_context() @@ -50,7 +50,7 @@ def test_forward_graph_data_parallel(): strategy1 = ((16, 1, 1), (16, 1, 1)) strategy2 = ((16, 1, 1),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_forward_graph_model_parallel(): @@ -58,7 +58,7 @@ def test_forward_graph_model_parallel(): strategy1 = ((1, 1, 16), (1, 1, 16)) strategy2 = ((1, 1, 16),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_forward_graph_hybrid_parallel(): @@ -66,13 +66,13 @@ def test_forward_graph_hybrid_parallel(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((2, 2, 4),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_forward_graph_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1) - compile(net) + compile_net(net) def test_forward_graph_repeat_calc(): @@ -80,4 +80,4 @@ def test_forward_graph_repeat_calc(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((1, 2, 2),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py index 6d943be511..26e0964b03 100644 --- a/tests/ut/python/parallel/test_gather_v2.py +++ b/tests/ut/python/parallel/test_gather_v2.py @@ -18,7 +18,6 @@ import mindspore as ms import mindspore.nn as nn from mindspore import Tensor from mindspore import context -from mindspore.common import dtype as mstype from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py index a416923759..99559784a4 100644 --- a/tests/ut/python/parallel/test_gather_v2_primitive.py +++ b/tests/ut/python/parallel/test_gather_v2_primitive.py @@ -120,7 +120,7 @@ class TrainOneStepCell(Cell): return F.depend(loss, self.optimizer(grads)) -def net_trains(gather_v2_strategy, criterion, rank): +def net_trains(criterion, rank): init() lr = 0.1 momentum = 0.9 @@ -151,42 +151,42 @@ def test_auto_batch_parallel(): gather_v2_strategy = None criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) rank = 2 - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_2d_index_auto_batch_parallel(): gather_v2_strategy = None criterion = GatherV2(2, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) rank = 2 - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_batch_parallel(): gather_v2_strategy = ((device_number, 1),) criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) rank = 2 - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_strategy1(): gather_v2_strategy = ((16, 2),) rank = 2 criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_strategy2(): gather_v2_strategy = ((1, device_number),) rank = 2 criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_strategy3(): gather_v2_strategy = ((8, 1),) rank = 2 criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) class GatherV2Axis1(_Loss): @@ -217,18 +217,18 @@ def test_axis1_auto_batch_parallel(): gather_v2_strategy = None criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) rank = 2 - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_axis1_batch_parallel(): gather_v2_strategy = ((device_number, 1),) criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) rank = 2 - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) def test_axis1_strategy1(): gather_v2_strategy = ((16, 2),) rank = 17 criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) - net_trains(gather_v2_strategy, criterion, rank) + net_trains(criterion, rank) diff --git a/tests/ut/python/parallel/test_get_next.py b/tests/ut/python/parallel/test_get_next.py index 0ab5b5aa71..7bd8482027 100644 --- a/tests/ut/python/parallel/test_get_next.py +++ b/tests/ut/python/parallel/test_get_next.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np - import mindspore as ms import mindspore.nn as nn from mindspore import Tensor @@ -23,8 +21,6 @@ from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.ops import composite as C from mindspore.ops import operations as P -from mindspore.ops.operations.comm_ops import _VirtualDataset -from tests.ut.python.ops.test_math_ops import VirtualLoss context.set_context(mode=context.GRAPH_MODE) @@ -56,7 +52,7 @@ class GradWrap(nn.Cell): return C.grad_by_list(self.network, self.weights)() -def compile(net): +def compile_net(net): net.set_auto_parallel() _executor.compile(net) @@ -67,7 +63,7 @@ def test_get_next_single(): super().__init__() self.norm = P.L2Normalize(axis=1) self.prelu = P.PReLU() - self.w = Parameter(initializer(w, [channel, ]), name='w') + self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): x = self.norm(data) @@ -84,7 +80,7 @@ def test_get_next_semi_auto_parallel(): super().__init__() self.norm = P.L2Normalize().set_strategy(strategy1) self.prelu = P.PReLU().set_strategy(strategy2) - self.w = Parameter(initializer(w, [channel, ]), name='w') + self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): x = self.norm(data) @@ -99,7 +95,7 @@ def test_get_next_semi_auto_parallel(): strategy4=strategy4) net = GradWrap(net_with_loss) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(net) + compile_net(net) def test_get_next_semi_auto_parallel1(): @@ -108,7 +104,7 @@ def test_get_next_semi_auto_parallel1(): super().__init__() self.norm = P.L2Normalize().set_strategy(strategy1) self.prelu = P.PReLU().set_strategy(strategy2) - self.w = Parameter(initializer(w, [channel, ]), name='w') + self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): x = self.norm(data) @@ -123,7 +119,7 @@ def test_get_next_semi_auto_parallel1(): strategy4=strategy4) net = GradWrap(net_with_loss) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(net) + compile_net(net) def test_get_next_auto_parallel(): @@ -132,7 +128,7 @@ def test_get_next_auto_parallel(): super().__init__() self.norm = P.L2Normalize().set_strategy(strategy1) self.prelu = P.PReLU().set_strategy(strategy2) - self.w = Parameter(initializer(w, [channel, ]), name='w') + self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): x = self.norm(data) @@ -144,7 +140,7 @@ def test_get_next_auto_parallel(): net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2) net = GradWrap(net_with_loss) context.set_auto_parallel_context(parallel_mode="auto_parallel") - compile(net) + compile_net(net) def test_only_one_get_next(): @@ -159,4 +155,4 @@ def test_only_one_get_next(): context.set_auto_parallel_context(device_num=4, global_rank=0) net = Net() context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_get_parameter_layout.py b/tests/ut/python/parallel/test_get_parameter_layout.py index 3d27ddec94..a34ee94840 100644 --- a/tests/ut/python/parallel/test_get_parameter_layout.py +++ b/tests/ut/python/parallel/test_get_parameter_layout.py @@ -52,8 +52,8 @@ def test_get_parameter_layout(): x_layout = [[2, 4], [1, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [1, -1] weight_layout = [[2, 4], [0, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [0, -1] expect_dict = {'x': x_layout, 'w1': weight_layout} - # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut - assert (net.parameter_layout_dict == expect_dict) + # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut + assert net.parameter_layout_dict == expect_dict if __name__ == '__main__': diff --git a/tests/ut/python/parallel/test_hybird_parallel_activation.py b/tests/ut/python/parallel/test_hybird_parallel_activation.py index dded194bf1..8931fda12a 100644 --- a/tests/ut/python/parallel/test_hybird_parallel_activation.py +++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -72,7 +72,7 @@ def test_matmul_tanh(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_activation(): @@ -98,7 +98,7 @@ def test_matmul_activation(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_softmax(): @@ -124,7 +124,7 @@ def test_matmul_softmax(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_matmul_logsoftmax(): @@ -150,7 +150,7 @@ def test_matmul_logsoftmax(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_activations(): @@ -179,7 +179,7 @@ def test_activations(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_activations_repeated_calculation(): @@ -211,7 +211,7 @@ def test_activations_repeated_calculation(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_activations_axis_tuple(): @@ -243,4 +243,4 @@ def test_activations_axis_tuple(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py index ce834a58f0..08fe687a73 100644 --- a/tests/ut/python/parallel/test_layer_norm.py +++ b/tests/ut/python/parallel/test_layer_norm.py @@ -48,7 +48,7 @@ _w = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32) _b = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -62,7 +62,7 @@ def test_layer_norm_data_parallel(): strategy2 = ((16, 1, 1, 1), (1, 1, 1), (1, 1, 1)) strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1)) net = Net(_w, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_layer_norm_model_parallel(): @@ -71,7 +71,7 @@ def test_layer_norm_model_parallel(): strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1)) strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1)) net = Net(_w, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_layer_norm_hybrid_parallel(): @@ -80,13 +80,13 @@ def test_layer_norm_hybrid_parallel(): strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1)) strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1)) net = Net(_w, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_layer_norm_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w) - compile(net) + compile_net(net) def test_layer_norm_repeat_calc(): @@ -95,7 +95,7 @@ def test_layer_norm_repeat_calc(): strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1)) strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net(_w, strategy1, strategy2, strategy3) - compile(net) + compile_net(net) def test_layer_norm_wrong_strategy(): @@ -105,4 +105,4 @@ def test_layer_norm_wrong_strategy(): strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net(_w, strategy1, strategy2, strategy3) with pytest.raises(RuntimeError): - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_linear.py b/tests/ut/python/parallel/test_linear.py index efa6f58c61..795a0f604f 100644 --- a/tests/ut/python/parallel/test_linear.py +++ b/tests/ut/python/parallel/test_linear.py @@ -21,7 +21,6 @@ from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class NetWithLoss(nn.Cell): diff --git a/tests/ut/python/parallel/test_loss_and_optimizer.py b/tests/ut/python/parallel/test_loss_and_optimizer.py index 6a2e9bc65d..b4cf62c29e 100644 --- a/tests/ut/python/parallel/test_loss_and_optimizer.py +++ b/tests/ut/python/parallel/test_loss_and_optimizer.py @@ -19,9 +19,8 @@ import mindspore.nn as nn from mindspore import Tensor, Parameter from mindspore import context from mindspore.common.api import _executor -from mindspore.nn import TrainOneStepCell, WithLossCell +from mindspore.nn import TrainOneStepCell from mindspore.nn.optim import Momentum, LARS -from mindspore.ops import composite as C from mindspore.ops import operations as P @@ -36,7 +35,7 @@ class NetWithLoss(nn.Cell): return self.loss(predict, b)[0] -def compile(net, x, b): +def compile_net(net, x, b): net.set_auto_parallel() _executor.compile(net, x, b) @@ -72,7 +71,7 @@ def test_momentum(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) def test_momentum_with_loss_scale(): @@ -106,7 +105,7 @@ def test_momentum_with_loss_scale(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) def test_momentum_with_dynamic_lr(): @@ -141,7 +140,7 @@ def test_momentum_with_dynamic_lr(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) def test_momentum_with_loss_scale_and_dynamic_lr(): @@ -177,7 +176,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) def test_lars(): @@ -205,11 +204,11 @@ def test_lars(): net = Net(strategy1, strategy2, weight) lr = Tensor(np.ones([6]), dtype=ms.float32) - SGD = Momentum(net.trainable_params(), lr, 0.9) - optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, + sgd = Momentum(net.trainable_params(), lr, 0.9) + optimizer = LARS(sgd, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, lars_filter=lambda x: 'bn' not in x.name) net_with_loss = NetWithLoss(net, strategy3) train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) diff --git a/tests/ut/python/parallel/test_matmul_tensor.py b/tests/ut/python/parallel/test_matmul_tensor.py index 4af9fabbf4..329d8653a0 100644 --- a/tests/ut/python/parallel/test_matmul_tensor.py +++ b/tests/ut/python/parallel/test_matmul_tensor.py @@ -46,7 +46,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y) -def compile(net, x, y): +def compile_net(net, x, y): net.set_auto_parallel() _executor.compile(net, x, y) @@ -79,7 +79,7 @@ def test_two_matmul(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) def test_matmul_mul_broadcast2(): @@ -103,7 +103,7 @@ def test_matmul_mul_broadcast2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) def test_two_matmul1(): @@ -133,7 +133,7 @@ def test_two_matmul1(): x = Tensor(np.ones([128, 128]), dtype=ms.float32) y = Tensor(np.ones([128, 128]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) def test_matmul_add_tensor(): @@ -158,4 +158,4 @@ def test_matmul_add_tensor(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) diff --git a/tests/ut/python/parallel/test_neg.py b/tests/ut/python/parallel/test_neg.py index a44b851a74..34819373d6 100644 --- a/tests/ut/python/parallel/test_neg.py +++ b/tests/ut/python/parallel/test_neg.py @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -52,7 +52,7 @@ def test_neg_data_parallel(): strategy1 = ((16, 1, 1), (16, 1, 1)) strategy2 = ((16, 1, 1),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_neg_model_parallel(): @@ -60,7 +60,7 @@ def test_neg_model_parallel(): strategy1 = ((1, 1, 16), (1, 1, 16)) strategy2 = ((1, 1, 16),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_neg_hybrid_parallel(): @@ -68,13 +68,13 @@ def test_neg_hybrid_parallel(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((2, 2, 4),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_neg_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1) - compile(net) + compile_net(net) def test_neg_repeat_calc(): @@ -82,4 +82,4 @@ def test_neg_repeat_calc(): strategy1 = ((2, 2, 4), (2, 2, 4)) strategy2 = ((1, 2, 2),) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py index 86626aabf1..056f4a15c7 100644 --- a/tests/ut/python/parallel/test_one_dev.py +++ b/tests/ut/python/parallel/test_one_dev.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import re +import numpy as np import mindspore as ms import mindspore.nn as nn diff --git a/tests/ut/python/parallel/test_one_hot_net.py b/tests/ut/python/parallel/test_one_hot_net.py index ba067781a6..db152ff4b8 100644 --- a/tests/ut/python/parallel/test_one_hot_net.py +++ b/tests/ut/python/parallel/test_one_hot_net.py @@ -159,8 +159,8 @@ class SemiAutoOneHotNet(Cell): weight_np = np.zeros(weight_shape, np.float32) self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight') - def construct(self, input, label): - input_n = self.normalize(input) + def construct(self, input_, label): + input_n = self.normalize(input_) w = self.normalize2(self.weight) fc_o = self.fc(input_n, w) fc_o_shape = F.shape(fc_o) @@ -209,9 +209,8 @@ class Dataset(MindData): raise StopIteration self.index += 1 if self.input_num == 2: - return self.predict, self.label - else: - return self.predict, + return (self.predict, self.label) + return (self.predict,) def reset(self): self.index = 0 @@ -268,20 +267,20 @@ def test_bn_reshape_dense_bn_train_loss(): batch_size = 16 device_num = 16 context.set_auto_parallel_context(device_num=device_num, global_rank=0) - input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]), dtype=ms.int32) net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() - _executor.compile(net, input, label) + _executor.compile(net, input_, label) def test_semi_one_hot_net_batch(): batch_size = 16 context.set_auto_parallel_context(device_num=device_num, global_rank=0) - input = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]), dtype=ms.int32) net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch()) @@ -289,7 +288,7 @@ def test_semi_one_hot_net_batch(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() - _executor.compile(net, input, label) + _executor.compile(net, input_, label) def test_semi_one_hot_net_model(): diff --git a/tests/ut/python/parallel/test_one_weight_parameter.py b/tests/ut/python/parallel/test_one_weight_parameter.py index 7ba812f24a..ac98b9320f 100644 --- a/tests/ut/python/parallel/test_one_weight_parameter.py +++ b/tests/ut/python/parallel/test_one_weight_parameter.py @@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P diff --git a/tests/ut/python/parallel/test_onehot.py b/tests/ut/python/parallel/test_onehot.py index 68983c3dc9..81152d4849 100644 --- a/tests/ut/python/parallel/test_onehot.py +++ b/tests/ut/python/parallel/test_onehot.py @@ -126,15 +126,6 @@ def test_onehot_auto(): compile_graph(strategy1, strategy2, strategy3, strategy4, auto=True) -def test_onehot_model_parallel(): - context.set_auto_parallel_context(device_num=16, global_rank=0) - strategy1 = ((2, 4), (4, 2)) - strategy2 = ((2, 8),) - strategy3 = ((1, 16), (), ()) - strategy4 = ((16, 1), (16, 1)) - compile_graph(strategy1, strategy2, strategy3, strategy4) - - def test_onehot_batch_parallel_axis0(): context.set_auto_parallel_context(device_num=16, global_rank=0) strategy1 = ((2, 4), (4, 2)) diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py index cb7d013c51..26f804537b 100644 --- a/tests/ut/python/parallel/test_operator_model_parallel.py +++ b/tests/ut/python/parallel/test_operator_model_parallel.py @@ -21,8 +21,6 @@ from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor from mindspore.nn.cell import Cell -from mindspore.nn.layer.activation import ReLU -from mindspore.nn.layer.basic import Dense from mindspore.nn.layer.basic import Flatten from mindspore.nn.layer.conv import Conv2d from mindspore.nn.layer.normalization import BatchNorm2d @@ -61,8 +59,7 @@ class DenseWrap(Cell): self.has_bias = has_bias self.weight = Parameter(initializer( - weight_init, [output_channels, input_channels]), - name="weight") + weight_init, [output_channels, input_channels]), name="weight") if self.has_bias: self.bias = Parameter(initializer( @@ -103,7 +100,7 @@ class DatasetLenet(MindData): self.index = 0 -def conv3x3(in_channels, out_channels, stride=1, padding=1): +def conv3x3(in_channels, out_channels, stride=1): """3x3 convolution """ weight_shape = (out_channels, in_channels, 3, 3) weight = Tensor(np.ones(weight_shape).astype(np.float32)) @@ -114,7 +111,7 @@ def conv3x3(in_channels, out_channels, stride=1, padding=1): return conv -def conv1x1(in_channels, out_channels, stride=1, padding=0): +def conv1x1(in_channels, out_channels, stride=1): """1x1 convolution""" weight_shape = (out_channels, in_channels, 1, 1) weight = Tensor(np.ones(weight_shape).astype(np.float32)) @@ -125,7 +122,7 @@ def conv1x1(in_channels, out_channels, stride=1, padding=0): return conv -def conv7x7(in_channels, out_channels, stride=1, padding=0): +def conv7x7(in_channels, out_channels, stride=1): """1x1 convolution""" weight_shape = (out_channels, in_channels, 7, 7) weight = Tensor(np.ones(weight_shape).astype(np.float32)) @@ -186,18 +183,17 @@ class ResidualBlock(Cell): def __init__(self, in_channels, out_channels, - stride=1, - down_sample=False): + stride=1): super(ResidualBlock, self).__init__() out_chls = out_channels // self.expansion - self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0) + self.conv1 = conv1x1(in_channels, out_chls, stride=1) self.bn1 = bn_with_initialize(out_chls) - self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0) + self.conv2 = conv3x3(out_chls, out_chls, stride=stride) self.bn2 = bn_with_initialize(out_chls) - self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) + self.conv3 = conv1x1(out_chls, out_channels, stride=1) self.bn3 = bn_with_initialize_last(out_channels) self.relu1 = P.ReLU().set_strategy(strategy_no_weight) @@ -236,21 +232,21 @@ class ResidualBlockWithDown(Cell): super(ResidualBlockWithDown, self).__init__() out_chls = out_channels // self.expansion - self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0) + self.conv1 = conv1x1(in_channels, out_chls, stride=1) self.bn1 = bn_with_initialize(out_chls) - self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0) + self.conv2 = conv3x3(out_chls, out_chls, stride=stride) self.bn2 = bn_with_initialize(out_chls) - self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) + self.conv3 = conv1x1(out_chls, out_channels, stride=1) self.bn3 = bn_with_initialize_last(out_channels) self.relu1 = P.ReLU().set_strategy(strategy_no_weight) self.relu2 = P.ReLU().set_strategy(strategy_no_weight) self.relu3 = P.ReLU().set_strategy(strategy_no_weight) - self.downSample = down_sample + self.down_sample = down_sample - self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0) + self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride) self.bn_down_sample = bn_with_initialize(out_channels) self.add = TensorAdd().set_strategy(strategy_add) @@ -279,7 +275,7 @@ class ResidualBlockWithDown(Cell): class MakeLayer0(Cell): - def __init__(self, block, layer_num, in_channels, out_channels, stride): + def __init__(self, block, in_channels, out_channels, stride): super(MakeLayer0, self).__init__() self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True) self.b = block(out_channels, out_channels, stride=stride) @@ -295,14 +291,14 @@ class MakeLayer0(Cell): class ResNet(Cell): - def __init__(self, block, layer_num, num_classes=100): + def __init__(self, block, num_classes=100): super(ResNet, self).__init__() - self.conv1 = conv7x7(3, 64, stride=2, padding=3) + self.conv1 = conv7x7(3, 64, stride=2) self.bn1 = bn_with_initialize(64) self.relu = P.ReLU().set_strategy(strategy_no_weight) self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0( - block, layer_num[0], in_channels=64, out_channels=256, stride=1) + block, in_channels=64, out_channels=256, stride=1) self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) self.fc = fc_with_initialize(64 * block.expansion, num_classes) self.flatten = Flatten() @@ -320,12 +316,12 @@ class ResNet(Cell): class ResNetModelParallel(Cell): - def __init__(self, block, layer_num, num_classes=100): + def __init__(self, block, num_classes=100): super(ResNetModelParallel, self).__init__() self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),)) self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0( - block, layer_num[0], in_channels=64, out_channels=256, stride=1) + block, in_channels=64, out_channels=256, stride=1) self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) self.fc = fc_with_initialize(64 * block.expansion, num_classes) self.flatten = Flatten() @@ -341,11 +337,11 @@ class ResNetModelParallel(Cell): def resnet_operator_net(num_classes): - return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes) + return ResNet(ResidualBlock, num_classes) def resnet_model_parallel_net(num_classes): - return ResNetModelParallel(ResidualBlock, [3, 4, 6, 3], num_classes) + return ResNetModelParallel(ResidualBlock, num_classes) def test_resnet_operator_batch_parallel(): @@ -354,7 +350,6 @@ def test_resnet_operator_batch_parallel(): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 - rank_size = dev_num context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=dev_num, global_rank=0) @@ -381,7 +376,6 @@ def test_resnet_model_parallel(): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 - rank_size = dev_num context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=dev_num, global_rank=0) diff --git a/tests/ut/python/parallel/test_optimizer_clone_weight.py b/tests/ut/python/parallel/test_optimizer_clone_weight.py index 8570aede2f..baf5e74861 100644 --- a/tests/ut/python/parallel/test_optimizer_clone_weight.py +++ b/tests/ut/python/parallel/test_optimizer_clone_weight.py @@ -35,7 +35,7 @@ class NetWithLoss(nn.Cell): return self.loss(predict, b)[0] -def compile(net, x, b): +def compile_net(net, x, b): net.set_auto_parallel() _Executor().compile(net, x, b) @@ -72,7 +72,7 @@ def test_optimizer_clone_weight(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) def test_optimizer_clone_weight2(): @@ -107,4 +107,4 @@ def test_optimizer_clone_weight2(): train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(train_net, x, b) + compile_net(train_net, x, b) diff --git a/tests/ut/python/parallel/test_parameter_init.py b/tests/ut/python/parallel/test_parameter_init.py index 1daf3be7a5..bd36876f0c 100644 --- a/tests/ut/python/parallel/test_parameter_init.py +++ b/tests/ut/python/parallel/test_parameter_init.py @@ -52,7 +52,7 @@ def test_parameter_init(): weight = Tensor(np.ones([64, 32]), dtype=ms.float32) net = Net(strategy1, weight) - net(x, ) + net(x,) if __name__ == '__main__': diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py index ac84c1364c..6f0200c14b 100644 --- a/tests/ut/python/parallel/test_prelu.py +++ b/tests/ut/python/parallel/test_prelu.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y) -def compile(net, x, y): +def compile_net(net, x, y): net.set_auto_parallel() _executor.compile(net, x, y) @@ -63,7 +63,7 @@ def test_prelu_single_success1(): net = GradWrap(NetWithLoss(Net())) x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32) w = Tensor(np.random.rand(33), ms.float32) - compile(net, x, w) + compile_net(net, x, w) def test_prelu_single_success2(): @@ -80,7 +80,7 @@ def test_prelu_single_success2(): net = GradWrap(NetWithLoss(Net())) x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32) w = Tensor([0.1], ms.float32) - compile(net, x, w) + compile_net(net, x, w) def test_prelu_parallel_success1(): @@ -100,7 +100,7 @@ def test_prelu_parallel_success1(): x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32) w = Tensor(np.random.rand(4), dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy))) - compile(net, x, w) + compile_net(net, x, w) def test_prelu_parallel_success2(): @@ -120,7 +120,7 @@ def test_prelu_parallel_success2(): x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32) w = Tensor(np.random.rand(4), dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy))) - compile(net, x, w) + compile_net(net, x, w) def test_prelu_parallel_success3(): @@ -183,7 +183,7 @@ def test_prelu_parallel_success4(): x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32) w = Tensor(np.random.rand(16), dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy))) - compile(net, x, w) + compile_net(net, x, w) def test_prelu_parallel_success5(): @@ -203,4 +203,4 @@ def test_prelu_parallel_success5(): x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32) w = Tensor(np.random.rand(1), dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy))) - compile(net, x, w) + compile_net(net, x, w) diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py index 40f8ab2701..dca467ef8d 100644 --- a/tests/ut/python/parallel/test_prelu_cell.py +++ b/tests/ut/python/parallel/test_prelu_cell.py @@ -47,9 +47,8 @@ class Dataset(MindData): raise StopIteration self.index += 1 if self.input_num == 2: - return self.predict, self.label - else: - return self.predict, + return (self.predict, self.label) + return (self.predict,) def reset(self): self.index = 0 @@ -68,7 +67,7 @@ class PReLU(nn.Cell): if not isinstance(w, Tensor): raise TypeError("w only support np.float32, float or Tensor type.") - self.w = Parameter(initializer(w, [channel, ]), name='a') + self.w = Parameter(initializer(w, [channel,]), name='a') self.prelu = P.PReLU() self.relu = P.ReLU().set_strategy(((1,),)) self.sub = P.Sub().set_strategy(((1,), (1,))) @@ -97,7 +96,6 @@ def prelu_net(): def reshape_common(parallel_mode): - batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 diff --git a/tests/ut/python/parallel/test_reduce_method_info.py b/tests/ut/python/parallel/test_reduce_method_info.py index 530454fc1a..ca2bcb68fc 100644 --- a/tests/ut/python/parallel/test_reduce_method_info.py +++ b/tests/ut/python/parallel/test_reduce_method_info.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -74,7 +74,7 @@ def test_sum_mul(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul2(): @@ -101,7 +101,7 @@ def test_sum_mul2(): x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul3(): @@ -128,7 +128,7 @@ def test_sum_mul3(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul4(): @@ -155,7 +155,7 @@ def test_sum_mul4(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32, 1]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul5(): @@ -179,7 +179,7 @@ def test_sum_mul5(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul6(): @@ -203,7 +203,7 @@ def test_sum_mul6(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_sum_mul7(): @@ -227,7 +227,7 @@ def test_sum_mul7(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([1, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_max_mul(): @@ -254,7 +254,7 @@ def test_max_mul(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_min_mul(): @@ -281,7 +281,7 @@ def test_min_mul(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_reduce_mean_mul_float32(): @@ -309,7 +309,7 @@ def test_reduce_mean_mul_float32(): y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) class ArgMaxWithValueNet(nn.Cell): @@ -321,7 +321,7 @@ class ArgMaxWithValueNet(nn.Cell): def construct(self, x, y, b): out = self.mul1(x, y) - index, out = self.arg_max_with_value(out) + _, out = self.arg_max_with_value(out) out = self.mul2(out, b) return out @@ -335,16 +335,16 @@ class ArgMinWithValueNet(nn.Cell): def construct(self, x, y, b): out = self.mul1(x, y) - index, out = self.arg_min_with_value(out) + _, out = self.arg_min_with_value(out) out = self.mul2(out, b) return out -def gen_inputs_and_compile(net): +def gen_inputs_and_compile_net(net): x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): @@ -354,7 +354,7 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): strategy3 = ((2, 4), (2, 4)) net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_max_with_value_mul_semi(): @@ -364,7 +364,7 @@ def test_arg_max_with_value_mul_semi(): strategy3 = ((2, 4), (2, 4)) net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_max_with_value_mul_auto(): @@ -374,7 +374,7 @@ def test_arg_max_with_value_mul_auto(): strategy3 = None net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_min_with_value_mul_semi_axis_parallel(): @@ -384,7 +384,7 @@ def test_arg_min_with_value_mul_semi_axis_parallel(): strategy3 = ((2, 4), (2, 4)) net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_min_with_value_mul_semi(): @@ -394,7 +394,7 @@ def test_arg_min_with_value_mul_semi(): strategy3 = ((2, 4), (2, 4)) net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_min_with_value_mul_auto(): @@ -404,7 +404,7 @@ def test_arg_min_with_value_mul_auto(): strategy3 = None net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) class ArgMinWithValueNet2(nn.Cell): @@ -416,7 +416,7 @@ class ArgMinWithValueNet2(nn.Cell): def construct(self, x, y, b): out = self.mul1(x, y) - index, out = self.arg_min_with_value(out) + _, out = self.arg_min_with_value(out) out = self.relu(out) return out @@ -428,7 +428,7 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2(): strategy3 = ((2, 4, 1),) net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_min_with_value_mul_semi2(): @@ -438,7 +438,7 @@ def test_arg_min_with_value_mul_semi2(): strategy3 = ((2, 4, 1),) net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_arg_min_with_value_mul_auto2(): @@ -448,7 +448,7 @@ def test_arg_min_with_value_mul_auto2(): strategy3 = None net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) context.set_auto_parallel_context(parallel_mode="auto_parallel") - gen_inputs_and_compile(net) + gen_inputs_and_compile_net(net) def test_cross_batch(): @@ -475,7 +475,7 @@ def test_cross_batch(): x = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cross_batch2(): @@ -502,7 +502,7 @@ def test_cross_batch2(): x = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_cross_batch_auto(): @@ -526,7 +526,7 @@ def test_cross_batch_auto(): x = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([32, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_max_empty_tuple(): @@ -554,4 +554,4 @@ def test_max_empty_tuple(): y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index f3e4160b01..7d671b381b 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -18,7 +18,6 @@ import mindspore as ms import mindspore.nn as nn from mindspore import Tensor from mindspore import context -from mindspore.common import dtype as mstype from mindspore.common.api import _executor from mindspore.common.parameter import Parameter from mindspore.common.parameter import ParameterTuple @@ -54,9 +53,8 @@ class Dataset(MindData): raise StopIteration self.index += 1 if self.input_num == 2: - return self.predict, self.label - else: - return self.predict, + return (self.predict, self.label) + return (self.predict,) def reset(self): self.index = 0 @@ -82,7 +80,6 @@ def reshape_net(strategy0, strategy1, strategy2): def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss): - batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 @@ -306,21 +303,21 @@ class ReshapeNet6(nn.Cell): return matmul2_o -def compile(net, input): +def compile_net(net, input_): net.set_auto_parallel() - _executor.compile(net, input) + _executor.compile(net, input_) def reshape_net2(backbone): batch_size = 16 device_num = 16 context.set_auto_parallel_context(device_num=device_num, global_rank=0) - input = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01) net = GradWrap(NetWithLoss(backbone)) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(net, input) + compile_net(net, input_) def test_reshape_net1_1(): @@ -480,11 +477,11 @@ def test_batchnorm_reshape_train(): device_num = 16 context.set_auto_parallel_context(device_num=device_num, global_rank=0) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - input = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01) net = GradWrap(NetWithLoss(BatchNormReshapeNet())) - compile(net, input) + compile_net(net, input_) def bn_with_initialize(out_channels): @@ -517,12 +514,12 @@ def test_bn_reshape_dense_bn_train(): batch_size = 16 device_num = 16 context.set_auto_parallel_context(device_num=device_num, global_rank=0) - input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) + input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - compile(net, input) + compile_net(net, input_) class ParallelReduceMeanNet(nn.Cell): diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py index 214b6cb50d..4eee4a2def 100644 --- a/tests/ut/python/parallel/test_reshape_parameter.py +++ b/tests/ut/python/parallel/test_reshape_parameter.py @@ -58,7 +58,7 @@ class Net(nn.Cell): return out -def compile(net, x, y): +def compile_net(net, x, y): net.set_auto_parallel() _executor.compile(net, x, y) @@ -69,7 +69,7 @@ def test_reshape_parameter_data_parallel(): net = GradWrap(NetWithLoss(Net(strategy))) x = Tensor(np.ones([10000, 36]), dtype=ms.float32) y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) def test_reshape_parameter_model_parallel(): @@ -78,4 +78,4 @@ def test_reshape_parameter_model_parallel(): net = GradWrap(NetWithLoss(Net(strategy))) x = Tensor(np.ones([10000, 36]), dtype=ms.float32) y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) - compile(net, x, y) + compile_net(net, x, y) diff --git a/tests/ut/python/parallel/test_scalar_loss.py b/tests/ut/python/parallel/test_scalar_loss.py index 90fd966422..90291caed4 100644 --- a/tests/ut/python/parallel/test_scalar_loss.py +++ b/tests/ut/python/parallel/test_scalar_loss.py @@ -22,7 +22,6 @@ from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import functional as F from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class GradWrap(nn.Cell): diff --git a/tests/ut/python/parallel/test_set_auto_parallel_context.py b/tests/ut/python/parallel/test_set_auto_parallel_context.py index 9df9833b16..4343f34d78 100644 --- a/tests/ut/python/parallel/test_set_auto_parallel_context.py +++ b/tests/ut/python/parallel/test_set_auto_parallel_context.py @@ -30,10 +30,10 @@ def test_set_auto_parallel_context(): parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast") assert device_num == 4 assert global_rank == 3 - assert mirror_mean == True - assert cast_before_mirror == False + assert mirror_mean + assert not cast_before_mirror assert parallel_mode == "auto_parallel" - assert parameter_broadcast == False + assert not parameter_broadcast auto_parallel_context().set_communication_backend("hccl") backend = auto_parallel_context().get_communication_backend() @@ -43,7 +43,7 @@ def test_set_auto_parallel_context(): device_num = auto_parallel_context().get_device_num() device_num_is_set = auto_parallel_context().get_device_num_is_set() assert device_num == 4 - assert device_num_is_set == True + assert device_num_is_set auto_parallel_context().set_global_rank(4) global_rank = auto_parallel_context().get_global_rank() @@ -51,14 +51,14 @@ def test_set_auto_parallel_context(): auto_parallel_context().set_mirror_mean(True) mirror_mean = auto_parallel_context().get_mirror_mean() - assert mirror_mean == True + assert mirror_mean auto_parallel_context().set_cast_before_mirror(False) cast_before_mirror = auto_parallel_context().get_cast_before_mirror() - assert cast_before_mirror == False + assert not cast_before_mirror parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set() - assert parameter_broadcast_is_set == True + assert parameter_broadcast_is_set with pytest.raises(ValueError): context.set_auto_parallel_context(device_num=0) @@ -94,9 +94,9 @@ def test_reset_auto_parallel_context(): parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set() assert device_num == 1 assert global_rank == 0 - assert mirror_mean == False - assert cast_before_mirror == True + assert not mirror_mean + assert cast_before_mirror assert parallel_mode == "stand_alone" - assert parameter_broadcast == False - assert device_num_is_set == False - assert parameter_broadcast_is_set == False + assert not parameter_broadcast + assert not device_num_is_set + assert not parameter_broadcast_is_set diff --git a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py index c905950df2..b4b956a9e7 100644 --- a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py +++ b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32) _b = Tensor(np.ones([128, 64]), dtype=ms.float32) -def compile(net): +def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() @@ -52,7 +52,7 @@ def test_sigmoid_cross_entropy_with_logits_data_parallel(): strategy1 = ((16, 1), (16, 1)) strategy2 = ((16, 1), (16, 1)) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_sigmoid_cross_entropy_with_logits_model_parallel(): @@ -60,7 +60,7 @@ def test_sigmoid_cross_entropy_with_logits_model_parallel(): strategy1 = ((1, 16), (1, 16)) strategy2 = ((1, 16), (1, 16)) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_sigmoid_cross_entropy_with_logits_hybrid_parallel(): @@ -68,13 +68,13 @@ def test_sigmoid_cross_entropy_with_logits_hybrid_parallel(): strategy1 = ((2, 8), (2, 8)) strategy2 = ((2, 8), (2, 8)) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) def test_sigmoid_cross_entropy_with_logits_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(_w1) - compile(net) + compile_net(net) def test_sigmoid_cross_entropy_with_logits_repeat_calc(): @@ -82,4 +82,4 @@ def test_sigmoid_cross_entropy_with_logits_repeat_calc(): strategy1 = ((2, 8), (2, 8)) strategy2 = ((2, 2), (2, 2)) net = Net(_w1, strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py index d6c2072918..062d29a136 100644 --- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py +++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py @@ -21,7 +21,6 @@ from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class NetWithLoss(nn.Cell): @@ -44,7 +43,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -71,7 +70,7 @@ def test_softmax_cross_entropy_loss(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_softmax_cross_entropy_loss_repeated_calculation(): @@ -96,7 +95,7 @@ def test_softmax_cross_entropy_loss_repeated_calculation(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_softmax_cross_entropy_loss_auto_batch_parallel(): @@ -118,4 +117,4 @@ def test_softmax_cross_entropy_loss_auto_batch_parallel(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_split_grad_sens.py b/tests/ut/python/parallel/test_split_grad_sens.py index 47e95dc454..6da1856cba 100644 --- a/tests/ut/python/parallel/test_split_grad_sens.py +++ b/tests/ut/python/parallel/test_split_grad_sens.py @@ -22,7 +22,6 @@ from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class GradWrap(nn.Cell): @@ -54,7 +53,7 @@ class GradWrap3(nn.Cell): return C.grad_all(self.network)(x, y, bias) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -81,7 +80,7 @@ def test_no_grad(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_grad_sens_parameter_type(): @@ -135,7 +134,7 @@ def test_grad_sens_tensor_type(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_grad_sens_scalar_broadcast(): @@ -159,4 +158,4 @@ def test_grad_sens_scalar_broadcast(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) bias = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, bias) + compile_net(net, x, y, bias) diff --git a/tests/ut/python/parallel/test_squeeze_info.py b/tests/ut/python/parallel/test_squeeze_info.py index eba77db3e6..0b3144346e 100644 --- a/tests/ut/python/parallel/test_squeeze_info.py +++ b/tests/ut/python/parallel/test_squeeze_info.py @@ -15,9 +15,9 @@ import numpy as np import mindspore as ms -from mindspore import context, Tensor, Parameter +from mindspore import context, Tensor from mindspore.common.api import _executor -from mindspore.nn import Cell, TrainOneStepCell, Momentum +from mindspore.nn import Cell from mindspore.ops import operations as P @@ -37,7 +37,7 @@ _x = Tensor(np.ones([64, 1, 32, 1]), dtype=ms.float32) _b = Tensor(np.ones([64, 32]), dtype=ms.float32) -def compile(net): +def compile_net(net): net.set_auto_parallel() _executor.compile(net, _x, _b) context.reset_auto_parallel_context() @@ -48,7 +48,7 @@ def test_squeeze_data_parallel(): strategy1 = ((16, 1, 1, 1),) strategy2 = ((16, 1), (16, 1)) net = Net(strategy1, strategy2) - compile(net) + compile_net(net) def test_squeeze_model_parallel(): @@ -56,7 +56,7 @@ def test_squeeze_model_parallel(): strategy1 = ((1, 1, 16, 1),) strategy2 = ((1, 16), (1, 16)) net = Net(strategy1, strategy2) - compile(net) + compile_net(net) def test_squeeze_specified_axis(): @@ -64,13 +64,13 @@ def test_squeeze_specified_axis(): strategy1 = ((4, 1, 4, 1),) strategy2 = ((8, 2), (8, 2)) net = Net(strategy1, strategy2, (1, 3)) - compile(net) + compile_net(net) def test_squeeze_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net() - compile(net) + compile_net(net) def test_squeeze_repeat_calc(): @@ -78,4 +78,4 @@ def test_squeeze_repeat_calc(): strategy1 = ((1, 1, 8, 1),) strategy2 = ((2, 8), (2, 8)) net = Net(strategy1, strategy2) - compile(net) + compile_net(net) diff --git a/tests/ut/python/parallel/test_sum_as_loss.py b/tests/ut/python/parallel/test_sum_as_loss.py index 336783f576..fc66df9d01 100644 --- a/tests/ut/python/parallel/test_sum_as_loss.py +++ b/tests/ut/python/parallel/test_sum_as_loss.py @@ -21,7 +21,6 @@ from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P -from tests.ut.python.ops.test_math_ops import VirtualLoss class GradWrap(nn.Cell): @@ -33,7 +32,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, bias) -def compile(net, x, y, bias): +def compile_net(net, x, y, bias): net.set_auto_parallel() _executor.compile(net, x, y, bias) @@ -59,7 +58,7 @@ def test_sum_as_loss(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) bias = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, bias) + compile_net(net, x, y, bias) def test_sum_as_loss2(): @@ -83,4 +82,4 @@ def test_sum_as_loss2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) bias = Tensor(np.ones([64]), dtype=ms.float32) - compile(net, x, y, bias) + compile_net(net, x, y, bias) diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py index f712416323..b0b917bf19 100644 --- a/tests/ut/python/parallel/test_transpose.py +++ b/tests/ut/python/parallel/test_transpose.py @@ -17,7 +17,6 @@ import numpy as np import mindspore as ms import mindspore.nn as nn from mindspore import Tensor, context -from mindspore import context from mindspore.common.parameter import Parameter from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum @@ -67,7 +66,6 @@ def transpose_net(strategy1, strategy2): def transpose_common(strategy1, strategy2): - batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py index 6e8b2085b6..daee920a91 100644 --- a/tests/ut/python/parallel/test_two_matmul.py +++ b/tests/ut/python/parallel/test_two_matmul.py @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): return C.grad_all(self.network)(x, y, b) -def compile(net, x, y, b): +def compile_net(net, x, y, b): net.set_auto_parallel() _executor.compile(net, x, y, b) @@ -72,7 +72,7 @@ def test_two_matmul(): y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_two_matmul_repeated_calculation1(): @@ -96,7 +96,7 @@ def test_two_matmul_repeated_calculation1(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) def test_two_matmul_repeated_calculation2(): @@ -120,4 +120,4 @@ def test_two_matmul_repeated_calculation2(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) - compile(net, x, y, b) + compile_net(net, x, y, b) diff --git a/tests/ut/python/parallel/test_two_weights_parameter.py b/tests/ut/python/parallel/test_two_weights_parameter.py index b010049b14..1684cf96dc 100644 --- a/tests/ut/python/parallel/test_two_weights_parameter.py +++ b/tests/ut/python/parallel/test_two_weights_parameter.py @@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py index 95aec05d32..e7ea717dc9 100644 --- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py +++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py @@ -78,7 +78,7 @@ def test_virtual_dataset_3_input(): def test_virtualdataset_cell_3_inputs(): class Net(nn.Cell): - def __init__(self, strategy0, strategy1, strategy2, strategy3): + def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul2 = P.MatMul().set_strategy(strategy2) @@ -89,7 +89,7 @@ def test_virtualdataset_cell_3_inputs(): out = self.matmul2(out, b) return out - net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None, None)))) + net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None)))) context.set_context(save_graphs=True) context.set_auto_parallel_context(parallel_mode="auto_parallel") context.set_auto_parallel_context(device_num=8, global_rank=0)