diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index f182e8e405..ff1c666c45 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -1512,7 +1512,87 @@ Status ValidStageCheck(const std::vector &stages, int32_t strategy_stag } } -void ExtractInformation(const std::vector &all_nodes) { +// find previous parallel care node. +bool FindPreNodes(const AnfNodePtr &node, vector *unique_ids) { + MS_EXCEPTION_IF_NULL(unique_ids); + // if previous node is a parameter, handle it in the outsize. + if (node->isa()) { + return false; + } + if (!node->isa()) { + return false; + } + CNodePtr cnode = node->cast(); + if (!IsValueNode(cnode->input(0))) { + return false; + } + ValueNodePtr prim_anf_node = cnode->input(0)->cast(); + PrimitivePtr prim = prim_anf_node->value()->cast(); + if (IsParallelCareNode(cnode) && prim->name() != MAKE_TUPLE && prim->name() != MAKE_LIST) { + unique_ids->push_back(cnode->UniqueId()); + return true; + } + bool find = false; + for (size_t index = 0; index < cnode->inputs().size(); ++index) { + if (prim->name() == DEPEND && index != 1) { + continue; + } + if (FindPreNodes(cnode->inputs()[index], unique_ids)) { + find = true; + continue; + } + } + return find; +} + +void FindLastNodesUniqueId(const std::vector &all_nodes, vector *unique_ids) { + MS_EXCEPTION_IF_NULL(unique_ids); + for (auto &node : all_nodes) { + auto cnode = node->cast(); + if ((cnode == nullptr) || !IsValueNode(cnode->input(0))) { + continue; + } + ValueNodePtr prim_anf_node = cnode->input(0)->cast(); + PrimitivePtr prim = GetValueNode(prim_anf_node); + if (prim->name() == RETURN) { + if (!FindPreNodes(cnode, unique_ids)) { + MS_LOG(WARNING) << "cannot find the last parallel care node in eval graph"; + } + } + } +} + +StrategyPtr GenerateBatchParallelStrategy(const OperatorInfoPtr operator_, const PrimitivePtr prim) { + MS_EXCEPTION_IF_NULL(operator_); + MS_EXCEPTION_IF_NULL(prim); + StrategyPtr strategyPtr; + std::shared_ptr strategy_v_ptr = operator_->GenerateBatchStrategies(); + MS_EXCEPTION_IF_NULL(strategy_v_ptr); + strategyPtr = NewStrategy(0, *strategy_v_ptr); + std::vector elements; + for (size_t i = 0; i < strategy_v_ptr->size(); i++) { + elements.push_back(MakeValue((*strategy_v_ptr)[i])); + } + ValueTuplePtr strategy = std::make_shared(elements); + // display the strategy generated by batch parallel + auto attrs = prim->attrs(); + attrs[GEN_STRATEGY] = strategy; + (void)prim->SetAttrs(attrs); + MS_LOG(INFO) << "prim " << prim->name() << " batch parallel strategy is " << attrs[GEN_STRATEGY]->ToString(); + return strategyPtr; +} + +void SetLastNodeStrategy(const StrategyPtr strategyPtr) { + auto strategys = strategyPtr->GetInputDim(); + for (size_t i = 0; i < strategys.size(); ++i) { + for (size_t j = 0; j < strategys[i].size(); ++j) { + strategys[i][j] = 1; + } + } + strategyPtr->ResetInputs(strategys); +} + +void ExtractInformation(const std::vector &all_nodes, bool is_training) { // load strategy map from checkpoint StrategyMap stra_map; if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) { @@ -1520,7 +1600,11 @@ void ExtractInformation(const std::vector &all_nodes) { MS_LOG(EXCEPTION) << "Load strategy checkpoint failed"; } } - + vector last_forward_node_ids; + if (!is_training) { + FindLastNodesUniqueId(all_nodes, &last_forward_node_ids); + MS_LOG(INFO) << "there are " << last_forward_node_ids.size() << " output nodes in eval/predict"; + } // Get global rank after the checkpoint? int32_t global_rank = ParallelContext::GetInstance()->global_rank(); std::vector stages = ParallelContext::GetInstance()->stage(); @@ -1572,30 +1656,22 @@ void ExtractInformation(const std::vector &all_nodes) { } bool load_strategy_from_ckpt = StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map.find(strategy_key_name) != stra_map.end(); - if (!StrategyFound(attrs) && !load_strategy_from_ckpt) { + bool is_last_nodes = std::find(last_forward_node_ids.begin(), last_forward_node_ids.end(), cnode->UniqueId()) != + last_forward_node_ids.end(); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + if ((is_last_nodes && !full_batch) || (!StrategyFound(attrs) && !load_strategy_from_ckpt)) { MS_LOG(INFO) << "ExtractInformation: the strategy of node " << node->ToString() << " prim " << prim->name() << " is empty, using batch parallel"; - std::shared_ptr strategy_v_ptr = operator_->GenerateBatchStrategies(); - if (strategy_v_ptr == nullptr) { - MS_LOG(EXCEPTION) << "Failure:Generate batch parallel strategy failed"; - } - std::vector elements; - for (size_t i = 0; i < strategy_v_ptr->size(); i++) { - elements.push_back(MakeValue((*strategy_v_ptr)[i])); - } - ValueTuplePtr strategy = std::make_shared(elements); - // display the strategy generated by batch parallel - attrs[GEN_STRATEGY] = strategy; - (void)prim->SetAttrs(attrs); - MS_LOG(INFO) << "node " << node->ToString() << " prim " << prim->name() << " batch parallel strategy is " - << attrs[GEN_STRATEGY]->ToString(); - strategyPtr = NewStrategy(0, *strategy_v_ptr); + strategyPtr = GenerateBatchParallelStrategy(operator_, prim); } else if (load_strategy_from_ckpt) { strategyPtr = stra_map[strategy_key_name]; } else { strategyPtr = ExtractStrategy(attrs); } if (strategyPtr != nullptr) { + if (is_last_nodes && full_batch) { + SetLastNodeStrategy(strategyPtr); + } (*operator_).set_stage_id(strategyPtr->GetInputStage()); MS_LOG(INFO) << "Extract stage id for op " << prim->name() << " is " << (*operator_).stage_id(); if (ValidStageCheck(stages, (*operator_).stage_id()) == FAILED) { @@ -2854,7 +2930,7 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) } // extract shape and strategy, set operator_info - ExtractInformation(all_nodes); + ExtractInformation(all_nodes, root->has_flag(TRAINING)); ReshapeInit(all_nodes); } diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h index 4ff9eef96e..47fb8e78c2 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.h +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h @@ -118,7 +118,7 @@ void CoverSliceShape(const FuncGraphPtr &root); void SetVirtualDatasetStrategy(const CNodePtr &node); // Creat parallel operator for primitive node(has strategy) -void ExtractInformation(const std::vector &all_nodes); +void ExtractInformation(const std::vector &all_nodes, bool is_training = true); TensorLayout GetInputLayoutFromCNode(const std::pair &node_pair); diff --git a/tests/ut/python/parallel/test_add_relu_redistribution.py b/tests/ut/python/parallel/test_add_relu_redistribution.py index ac88592399..1efb3acde8 100644 --- a/tests/ut/python/parallel/test_add_relu_redistribution.py +++ b/tests/ut/python/parallel/test_add_relu_redistribution.py @@ -59,6 +59,7 @@ class Grad(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_arithmetic.py b/tests/ut/python/parallel/test_arithmetic.py index 36e52a993e..2d475945ba 100644 --- a/tests/ut/python/parallel/test_arithmetic.py +++ b/tests/ut/python/parallel/test_arithmetic.py @@ -48,6 +48,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) @@ -649,6 +650,7 @@ def test_assign_sub(): def compile_sub_net(net, x): net.set_auto_parallel() + net.set_train() _executor.compile(net, x) context.set_auto_parallel_context(device_num=64, global_rank=15) @@ -696,6 +698,7 @@ def test_assign_add(): def compile_sub_net(net, x): net.set_auto_parallel() + net.set_train() _executor.compile(net, x) context.set_auto_parallel_context(device_num=64, global_rank=15) @@ -743,6 +746,7 @@ def test_assign(): def compile_sub_net(net, x): net.set_auto_parallel() + net.set_train() _executor.compile(net, x) context.set_auto_parallel_context(device_num=64, global_rank=15) diff --git a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py index 4f9cd92c3c..07b843b614 100644 --- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py +++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py @@ -73,4 +73,5 @@ def test_auto_parallel_bn_with_prelu(): net = GradWrap(NetWithLoss(Net())) net.set_auto_parallel() + net.set_train() _executor.compile(net, x) diff --git a/tests/ut/python/parallel/test_auto_parallel_activation.py b/tests/ut/python/parallel/test_auto_parallel_activation.py index beaa047f2f..2be2ae3554 100644 --- a/tests/ut/python/parallel/test_auto_parallel_activation.py +++ b/tests/ut/python/parallel/test_auto_parallel_activation.py @@ -43,6 +43,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py index fff6447c22..51413f98e5 100644 --- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py +++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py @@ -52,6 +52,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b, phase): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b, phase=phase) diff --git a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py index 530a122cfc..3bd389db7d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py +++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py @@ -61,6 +61,7 @@ def test_auto_parallel_assign_sub_with_ref_key(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, phase="train") strategies = _executor._get_shard_strategy(net) for (k, v) in strategies.items(): diff --git a/tests/ut/python/parallel/test_auto_parallel_cast.py b/tests/ut/python/parallel/test_auto_parallel_cast.py index 0868f0d871..4dee5c42de 100644 --- a/tests/ut/python/parallel/test_auto_parallel_cast.py +++ b/tests/ut/python/parallel/test_auto_parallel_cast.py @@ -81,6 +81,7 @@ def test_double_star_graph(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, y, z, w, phase='train') strategies = _executor._get_shard_strategy(net) expected_strategies = {'Default/network-Net/Cast-op0': [[8, 1]], diff --git a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py index 9ab8b27406..33059a7a48 100644 --- a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py +++ b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py @@ -72,4 +72,5 @@ def test_common_parameter(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z) diff --git a/tests/ut/python/parallel/test_auto_parallel_double_sources.py b/tests/ut/python/parallel/test_auto_parallel_double_sources.py index 6ad7858505..9e361e7106 100644 --- a/tests/ut/python/parallel/test_auto_parallel_double_sources.py +++ b/tests/ut/python/parallel/test_auto_parallel_double_sources.py @@ -79,6 +79,7 @@ def test_double_source_graph(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, a) @@ -114,4 +115,5 @@ def test_double_source_complex_graph(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, a) diff --git a/tests/ut/python/parallel/test_auto_parallel_double_star.py b/tests/ut/python/parallel/test_auto_parallel_double_star.py index 5a43159993..b6b43a6d26 100644 --- a/tests/ut/python/parallel/test_auto_parallel_double_star.py +++ b/tests/ut/python/parallel/test_auto_parallel_double_star.py @@ -83,4 +83,5 @@ def test_double_star_graph(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, a, b, c) diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py index 80775e48ac..0431604411 100644 --- a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py +++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py @@ -113,6 +113,7 @@ def test_double_subgraphs(): x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32) reset_op_id() + net.set_train() _executor.compile(net, x, phase='train') strategies = _executor._get_shard_strategy(net) for (k, v) in strategies.items(): diff --git a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py index 05e57801c0..b64f8fec5d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py +++ b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py @@ -70,4 +70,5 @@ def test_two_matmul(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py index c005fcffde..0590ff062b 100644 --- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, z, w, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, b) # model_parallel test diff --git a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py index 1a1c1502f3..2de76ab7d9 100644 --- a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py +++ b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py @@ -73,4 +73,5 @@ def test_auto_parallel_l2normalize(): x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b, phase='train') diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py index 738614ab5e..35012641f4 100644 --- a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py +++ b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py @@ -70,4 +70,5 @@ def test_two_matmul_dropout(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py index f8c64a4baa..b11836a435 100644 --- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py +++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py @@ -74,6 +74,7 @@ def test_matmul_prelu(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_shard_strategy(net) for (k, v) in strategies.items(): diff --git a/tests/ut/python/parallel/test_auto_parallel_multi_graph.py b/tests/ut/python/parallel/test_auto_parallel_multi_graph.py index f510fdedeb..ab71e6cb6f 100644 --- a/tests/ut/python/parallel/test_auto_parallel_multi_graph.py +++ b/tests/ut/python/parallel/test_auto_parallel_multi_graph.py @@ -58,6 +58,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, inputs_, label_) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_auto_parallel_onehot.py b/tests/ut/python/parallel/test_auto_parallel_onehot.py index 03fb233ae4..59c6ed7271 100644 --- a/tests/ut/python/parallel/test_auto_parallel_onehot.py +++ b/tests/ut/python/parallel/test_auto_parallel_onehot.py @@ -99,6 +99,7 @@ def test_auto_parallel_arithmetic(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.int32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py index ab84db70d0..0890dfd3c0 100644 --- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py +++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py @@ -68,6 +68,7 @@ def test_common_parameter(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, y, phase='train') strategies = _executor._get_shard_strategy(net) for (k, v) in strategies.items(): diff --git a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py index 2606a7d302..3011f44b8b 100644 --- a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py +++ b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py @@ -77,4 +77,5 @@ def test_four_matmul_linear(): net = GradWrap(NetWithLoss(Net(strategy1))) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py index 415ddf94d0..2161b17d6d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py +++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py index 5a384d874a..479c727475 100644 --- a/tests/ut/python/parallel/test_auto_parallel_reshape.py +++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py @@ -68,6 +68,7 @@ def test_reshape_matmul(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_reshape(): @@ -90,6 +91,7 @@ def test_reshape_reshape(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) @@ -115,6 +117,7 @@ def test_reshape_auto_1(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) @@ -143,6 +146,7 @@ def test_reshape_auto_2(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) @@ -168,6 +172,7 @@ def test_reshape_auto_3(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) @@ -194,6 +199,7 @@ def test_reshape_auto_4(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) @@ -244,6 +250,7 @@ def test_reshape_auto_5(): net = GradWrap5(NetWithLoss5(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) def test_reshape_auto_6(): @@ -291,6 +298,7 @@ def test_reshape_auto_6(): net = GradWrap6(NetWithLoss6(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) def test_reshape_auto_7(): @@ -313,4 +321,5 @@ def test_reshape_auto_7(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) diff --git a/tests/ut/python/parallel/test_auto_parallel_rhombus.py b/tests/ut/python/parallel/test_auto_parallel_rhombus.py index fb7b6caf6e..2bfc0ee4f9 100644 --- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py +++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py index 448e322c2a..8334fd893d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py +++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py @@ -66,4 +66,5 @@ def test_softmax_cross_entropy_loss_auto_parallel(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py b/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py index af3d0ac431..28ec839831 100644 --- a/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py +++ b/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py @@ -88,6 +88,7 @@ def test_star_strategy_consistency1(): context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, phase='train') @@ -102,6 +103,7 @@ def test_star_strategy_consistency2(): context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, phase='train') @@ -116,6 +118,7 @@ def test_star_strategy_consistency3(): context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, phase='train') @@ -131,4 +134,5 @@ def test_star_strategy_consistency4(): net.set_auto_parallel() reset_op_id() with pytest.raises(RuntimeError): + net.set_train() _executor.compile(net, x, phase='train') diff --git a/tests/ut/python/parallel/test_auto_parallel_transformer.py b/tests/ut/python/parallel/test_auto_parallel_transformer.py index 4a3d8daa44..196da302b2 100644 --- a/tests/ut/python/parallel/test_auto_parallel_transformer.py +++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py @@ -112,4 +112,5 @@ def test_dmnet_train_step(): net = GradWrap(NetWithLoss(MultiTransformer())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, input_) diff --git a/tests/ut/python/parallel/test_auto_parallel_transpose.py b/tests/ut/python/parallel/test_auto_parallel_transpose.py index c41ae9ce1c..d3d44e8f15 100644 --- a/tests/ut/python/parallel/test_auto_parallel_transpose.py +++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py @@ -76,6 +76,7 @@ def test_two_matmul_transpose(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_shard_strategy(net) expected_strategies = {'Default/network-Net/Transpose-op3': [[1, 16]], diff --git a/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py b/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py index 1436e1361e..5dd825b175 100644 --- a/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py +++ b/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py @@ -70,4 +70,5 @@ def test_triangle_strategy_consistency(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, phase='train') diff --git a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py index 8ed66b958e..78b58a67d6 100644 --- a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py +++ b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py @@ -78,4 +78,5 @@ def test_virtual_dataset_3_input(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 2048]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py index 9ddeb3d8c3..2f9c91625b 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py @@ -134,6 +134,7 @@ def test_two_matmul(): net.set_auto_parallel() reset_op_id() + net.set_train() _executor.compile(net, x, y, b, phase='train') strategies = _executor._get_shard_strategy(net) expected_strategies = {'Default/network-Net/MatMul-op0': [[16, 1], [1, 1]], diff --git a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py index aa0bfd126a..951f06e9f8 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py @@ -71,4 +71,5 @@ def test_four_matmul_linear(): net = GradWrap(NetWithLoss(Net(strategy1))) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py index 14affccf50..530e142b13 100644 --- a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py +++ b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py @@ -77,4 +77,5 @@ def test_zig_zag_graph(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, z, w, a) diff --git a/tests/ut/python/parallel/test_auto_star_elimination.py b/tests/ut/python/parallel/test_auto_star_elimination.py index 7b1945304e..8fd2ad2e19 100644 --- a/tests/ut/python/parallel/test_auto_star_elimination.py +++ b/tests/ut/python/parallel/test_auto_star_elimination.py @@ -89,4 +89,5 @@ def test_marin_loss(): net = GradWrap(NetWithLoss(MarginCE())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_batch_matmul.py b/tests/ut/python/parallel/test_batch_matmul.py index 87b5116348..c40d4d257a 100644 --- a/tests/ut/python/parallel/test_batch_matmul.py +++ b/tests/ut/python/parallel/test_batch_matmul.py @@ -45,6 +45,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_batch_parallel.py b/tests/ut/python/parallel/test_batch_parallel.py index 962e0ca0f6..91f0f4e7b2 100644 --- a/tests/ut/python/parallel/test_batch_parallel.py +++ b/tests/ut/python/parallel/test_batch_parallel.py @@ -108,6 +108,7 @@ def test_batch(): x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32) w1 = Tensor(np.ones([128, 8, 32, 32]), dtype=ms.float32) w2 = Tensor(np.ones([128, 64, 24, 24]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, w1, w2) diff --git a/tests/ut/python/parallel/test_batch_parallel_dropout.py b/tests/ut/python/parallel/test_batch_parallel_dropout.py index ba9c1a6933..3a4ed04759 100644 --- a/tests/ut/python/parallel/test_batch_parallel_dropout.py +++ b/tests/ut/python/parallel/test_batch_parallel_dropout.py @@ -70,4 +70,5 @@ def test_batch_parallel_dropout(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py index a81079e8ea..a92b9ee2ba 100644 --- a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py +++ b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py @@ -68,4 +68,5 @@ def test_matmul_add(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py index 249fe60350..ee91d56097 100644 --- a/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py +++ b/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py @@ -73,4 +73,5 @@ def test_two_matmul_batchnorm_ex(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_broadcast_to.py b/tests/ut/python/parallel/test_broadcast_to.py index 4159c9710e..450ecdb40d 100644 --- a/tests/ut/python/parallel/test_broadcast_to.py +++ b/tests/ut/python/parallel/test_broadcast_to.py @@ -68,6 +68,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x1) context.reset_auto_parallel_context() @@ -77,6 +78,7 @@ def compile_net2(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x1, _x2) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_comparison_function_info.py b/tests/ut/python/parallel/test_comparison_function_info.py index fc74d8ae46..62e9a19bc4 100644 --- a/tests/ut/python/parallel/test_comparison_function_info.py +++ b/tests/ut/python/parallel/test_comparison_function_info.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_concat.py b/tests/ut/python/parallel/test_concat.py index cb7875751e..a7ce942c22 100644 --- a/tests/ut/python/parallel/test_concat.py +++ b/tests/ut/python/parallel/test_concat.py @@ -84,6 +84,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_dense_matmul.py b/tests/ut/python/parallel/test_dense_matmul.py index e408c65f84..f98d32d381 100644 --- a/tests/ut/python/parallel/test_dense_matmul.py +++ b/tests/ut/python/parallel/test_dense_matmul.py @@ -51,4 +51,5 @@ def test_dmnet_train_step(): label = Tensor(np.zeros([32, 768]).astype(np.float32)) net = DenseMutMulNet() net = train_step_with_loss_warp(DenseMutMulNet()) + net.set_train() _executor.compile(net, input_, label) diff --git a/tests/ut/python/parallel/test_different_type_for_div_op.py b/tests/ut/python/parallel/test_different_type_for_div_op.py index 0a07f08d80..0c894cea9a 100644 --- a/tests/ut/python/parallel/test_different_type_for_div_op.py +++ b/tests/ut/python/parallel/test_different_type_for_div_op.py @@ -37,6 +37,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_dropout_do_mask.py b/tests/ut/python/parallel/test_dropout_do_mask.py index c966685b2a..f727105123 100644 --- a/tests/ut/python/parallel/test_dropout_do_mask.py +++ b/tests/ut/python/parallel/test_dropout_do_mask.py @@ -54,6 +54,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py index 7d6924fb8e..775d391367 100644 --- a/tests/ut/python/parallel/test_element_wise_function.py +++ b/tests/ut/python/parallel/test_element_wise_function.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_embeddinglookup.py b/tests/ut/python/parallel/test_embeddinglookup.py index 01159c0dc1..33c0645126 100644 --- a/tests/ut/python/parallel/test_embeddinglookup.py +++ b/tests/ut/python/parallel/test_embeddinglookup.py @@ -66,6 +66,7 @@ def test_embeddinglookup_reducescatter_false(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -77,6 +78,7 @@ def test_embeddinglookup_reducescatter_true(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -88,6 +90,7 @@ def test_embeddinglookup_reducescatter_false_grad(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -100,6 +103,7 @@ def test_embeddinglookup_reducescatter_true_grad(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -114,6 +118,7 @@ def test_embeddinglookup_semi_auto1(): net.set_auto_parallel() x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -128,4 +133,5 @@ def test_embeddinglookup_semi_auto2(): net.set_auto_parallel() x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_eval.py b/tests/ut/python/parallel/test_eval.py new file mode 100644 index 0000000000..eb777c4d8c --- /dev/null +++ b/tests/ut/python/parallel/test_eval.py @@ -0,0 +1,69 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np + +import mindspore as ms +from mindspore import context, Tensor, Parameter +from mindspore.common.api import _executor +from mindspore.nn import Cell +from mindspore.ops import operations as P + + +class Net(Cell): + def __init__(self, mul_weight, strategy1=None, strategy2=None): + super().__init__() + self.mul = P.Mul().shard(strategy1) + self.neg = P.Neg().shard(strategy2) + self.mul_weight = Parameter(mul_weight, "w1") + + def construct(self, x, b): + out = self.mul(x, self.mul_weight) + out = self.neg(out) + return out + + +class EvalNet(Cell): + def __init__(self, network, strategy2=None): + super().__init__() + self.network = network + self.relu = P.ReLU().shard(strategy2) + + def construct(self, x, b): + out = self.network(x, b) + out1 = self.relu(out) + return out, out1 + + +_x = Tensor(np.ones([64, 64]), dtype=ms.float32) +_w1 = Tensor(np.ones([64, 64]), dtype=ms.float32) +_b = Tensor(np.ones([64, 64]), dtype=ms.float32) + + +def test_train_and_eval(): + context.set_context(save_graphs=True, mode=0) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16) + strategy1 = ((4, 4), (4, 4)) + strategy2 = ((4, 4),) + net = Net(_w1, strategy1, strategy2) + eval_net = EvalNet(net, strategy2=strategy2) + net.set_auto_parallel() + net.set_train() + _executor.compile(net, _x, _b, phase='train', auto_parallel_mode=True) + + eval_net.set_train(mode=False) + eval_net.set_auto_parallel() + _executor.compile(eval_net, _x, _b, phase='eval', auto_parallel_mode=True) + + context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_expand_dims.py b/tests/ut/python/parallel/test_expand_dims.py index d71a78346b..e93f974f84 100644 --- a/tests/ut/python/parallel/test_expand_dims.py +++ b/tests/ut/python/parallel/test_expand_dims.py @@ -58,6 +58,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py index 8ad7451e6d..4780f0cd07 100644 --- a/tests/ut/python/parallel/test_forward_graph.py +++ b/tests/ut/python/parallel/test_forward_graph.py @@ -41,6 +41,7 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) def compile_net(net): net.set_auto_parallel() + net.set_train() _executor.compile(net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py index 9e845f5a58..b950d8b43a 100644 --- a/tests/ut/python/parallel/test_gather_v2.py +++ b/tests/ut/python/parallel/test_gather_v2.py @@ -71,6 +71,7 @@ def test_gatherv2_semi_auto0(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -83,6 +84,7 @@ def test_gatherv2_semi_auto1(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -95,6 +97,7 @@ def test_gatherv2_semi_auto2(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -107,6 +110,7 @@ def test_gatherv2_semi_auto3(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -119,6 +123,7 @@ def test_gatherv2_semi_auto4(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -131,6 +136,7 @@ def test_gatherv2_semi_auto5(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -142,6 +148,7 @@ def test_gatherv2_semi_auto6(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -153,6 +160,7 @@ def test_gatherv2_semi_auto7(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -165,6 +173,7 @@ def test_gatherv2_semi_auto8(): x = Tensor(np.ones([64]), dtype=ms.float32) y = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -174,6 +183,7 @@ def test_gatherv2_auto0(): net.set_auto_parallel() x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -183,4 +193,5 @@ def test_gatherv2_auto1(): net.set_auto_parallel() x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_gpu_dropout.py b/tests/ut/python/parallel/test_gpu_dropout.py index 148f0184f2..2dabc207c6 100644 --- a/tests/ut/python/parallel/test_gpu_dropout.py +++ b/tests/ut/python/parallel/test_gpu_dropout.py @@ -65,6 +65,7 @@ def test_dropout_semi_auto(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -77,6 +78,7 @@ def test_dropout_semi_auto2(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -89,6 +91,7 @@ def test_dropout_semi_auto3(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -99,4 +102,5 @@ def test_dropout_auto(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 128]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_hybird_parallel_activation.py b/tests/ut/python/parallel/test_hybird_parallel_activation.py index 8ff335e059..87552aed46 100644 --- a/tests/ut/python/parallel/test_hybird_parallel_activation.py +++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_initializer_weight_slice.py b/tests/ut/python/parallel/test_initializer_weight_slice.py index 85faf9fc21..cd68b9ebf4 100644 --- a/tests/ut/python/parallel/test_initializer_weight_slice.py +++ b/tests/ut/python/parallel/test_initializer_weight_slice.py @@ -53,6 +53,7 @@ def check_initializer_weight_slice(init_name="Uniform"): weight = initializer(init_name, [64, 32], ms.float32) net = Net(strategy1, strategy2, weight) net.set_auto_parallel() + net.set_train() exe.compile(net, x, auto_parallel_mode=True, phase='train') hccl.rank_id = rank_save return net.parameters_dict()['w1'].data.asnumpy() @@ -131,6 +132,7 @@ def test_check_initializer_weight_slice_seed(init_name="Uniform"): weight = initializer(init_name, [64, 32], ms.float32) net = Net(strategy1, strategy2, weight) net.set_auto_parallel() + net.set_train() exe.compile(net, x, auto_parallel_mode=True, phase='train') hccl.rank_id = rank_save return net.parameters_dict()['w1'].data.asnumpy() diff --git a/tests/ut/python/parallel/test_l2normalize.py b/tests/ut/python/parallel/test_l2normalize.py index 850e71eb93..ff1d4f8924 100644 --- a/tests/ut/python/parallel/test_l2normalize.py +++ b/tests/ut/python/parallel/test_l2normalize.py @@ -75,4 +75,5 @@ def test_l2normalize_matmul(): x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) b = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py index 78a019a80c..50c30002a8 100644 --- a/tests/ut/python/parallel/test_layer_norm.py +++ b/tests/ut/python/parallel/test_layer_norm.py @@ -52,6 +52,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_linear.py b/tests/ut/python/parallel/test_linear.py index 1b3cecad67..d368b5a033 100644 --- a/tests/ut/python/parallel/test_linear.py +++ b/tests/ut/python/parallel/test_linear.py @@ -73,4 +73,5 @@ def test_linear(): y = Tensor(np.ones([64, 32]), dtype=ms.float32) bias = Tensor(np.ones([64]), dtype=ms.float32) label = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, bias, label) diff --git a/tests/ut/python/parallel/test_loop_two_matmul.py b/tests/ut/python/parallel/test_loop_two_matmul.py index 9c36bff0d1..5c162a6d8f 100644 --- a/tests/ut/python/parallel/test_loop_two_matmul.py +++ b/tests/ut/python/parallel/test_loop_two_matmul.py @@ -95,5 +95,6 @@ def test_two_matmul(): net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) count = count + 1 diff --git a/tests/ut/python/parallel/test_loss_and_optimizer.py b/tests/ut/python/parallel/test_loss_and_optimizer.py index 215c6dd8d2..03c641e59a 100644 --- a/tests/ut/python/parallel/test_loss_and_optimizer.py +++ b/tests/ut/python/parallel/test_loss_and_optimizer.py @@ -37,6 +37,7 @@ class NetWithLoss(nn.Cell): def compile_net(net, x, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, b) diff --git a/tests/ut/python/parallel/test_manual_embedding_lookup.py b/tests/ut/python/parallel/test_manual_embedding_lookup.py index 22741f8695..542348946f 100644 --- a/tests/ut/python/parallel/test_manual_embedding_lookup.py +++ b/tests/ut/python/parallel/test_manual_embedding_lookup.py @@ -67,6 +67,7 @@ def compile_net(net): optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU") train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b, auto_parallel_mode=True) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_manual_gatherv2.py b/tests/ut/python/parallel/test_manual_gatherv2.py index 1d7ffddc7d..dd563bc52a 100644 --- a/tests/ut/python/parallel/test_manual_gatherv2.py +++ b/tests/ut/python/parallel/test_manual_gatherv2.py @@ -64,6 +64,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b, auto_parallel_mode=True) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_matmul_dropout.py b/tests/ut/python/parallel/test_matmul_dropout.py index 70718c7f5c..98f955935e 100644 --- a/tests/ut/python/parallel/test_matmul_dropout.py +++ b/tests/ut/python/parallel/test_matmul_dropout.py @@ -75,4 +75,5 @@ def test_two_matmul_dropout(): x = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_matmul_tensor.py b/tests/ut/python/parallel/test_matmul_tensor.py index 64359d7caa..12924a7275 100644 --- a/tests/ut/python/parallel/test_matmul_tensor.py +++ b/tests/ut/python/parallel/test_matmul_tensor.py @@ -51,6 +51,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py index 867246e97a..d30d27bf19 100644 --- a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py +++ b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py @@ -87,4 +87,5 @@ def test_two_matmul(): b = Tensor(np.ones([32, 64]), dtype=ms.float32) z = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y, b, z) diff --git a/tests/ut/python/parallel/test_neg.py b/tests/ut/python/parallel/test_neg.py index 28dac24ab5..6afcfe251f 100644 --- a/tests/ut/python/parallel/test_neg.py +++ b/tests/ut/python/parallel/test_neg.py @@ -43,6 +43,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_one_hot_net.py b/tests/ut/python/parallel/test_one_hot_net.py index 9f8eebf915..8ba68e1fe5 100644 --- a/tests/ut/python/parallel/test_one_hot_net.py +++ b/tests/ut/python/parallel/test_one_hot_net.py @@ -278,6 +278,7 @@ def test_bn_reshape_dense_bn_train_loss(): net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) net.set_auto_parallel() + net.set_train() _executor.compile(net, input_, label) @@ -292,6 +293,7 @@ def test_semi_one_hot_net_batch(): net = GradWrap(NetWithLoss(net)) net.set_auto_parallel() + net.set_train() _executor.compile(net, input_, label) diff --git a/tests/ut/python/parallel/test_one_weight_parameter.py b/tests/ut/python/parallel/test_one_weight_parameter.py index 8bd83148b4..6cffd4e7dd 100644 --- a/tests/ut/python/parallel/test_one_weight_parameter.py +++ b/tests/ut/python/parallel/test_one_weight_parameter.py @@ -76,4 +76,5 @@ def test_one_weight_parameter(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, x, b) diff --git a/tests/ut/python/parallel/test_onehot.py b/tests/ut/python/parallel/test_onehot.py index 26a77f40ea..d39fe28ff6 100644 --- a/tests/ut/python/parallel/test_onehot.py +++ b/tests/ut/python/parallel/test_onehot.py @@ -78,6 +78,7 @@ def compile_graph(strategy1, strategy2, strategy3, strategy4, auto=False, onthot x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64]), dtype=ms.int32) + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_pack.py b/tests/ut/python/parallel/test_pack.py index ccc8356703..51ee607a65 100644 --- a/tests/ut/python/parallel/test_pack.py +++ b/tests/ut/python/parallel/test_pack.py @@ -84,6 +84,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x) context.reset_auto_parallel_context() @@ -93,6 +94,7 @@ def compile_net1(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x1) context.reset_auto_parallel_context() @@ -102,6 +104,7 @@ def compile_net2(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x2) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_parallel_optimizer.py b/tests/ut/python/parallel/test_parallel_optimizer.py index e0f214878c..21f39346c3 100644 --- a/tests/ut/python/parallel/test_parallel_optimizer.py +++ b/tests/ut/python/parallel/test_parallel_optimizer.py @@ -76,6 +76,7 @@ def auto_parallel_compile_net(mode, dev_num, strategy1=None, strategy2=None): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_network = TrainOneStepCell(net, optimizer) train_network.set_auto_parallel() + train_network.set_train() _executor.compile(train_network, inputs, label) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_parameter_multi_users.py b/tests/ut/python/parallel/test_parameter_multi_users.py index 051af762c3..d0f01195c9 100644 --- a/tests/ut/python/parallel/test_parameter_multi_users.py +++ b/tests/ut/python/parallel/test_parameter_multi_users.py @@ -56,6 +56,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_pipeline_parallel.py b/tests/ut/python/parallel/test_pipeline_parallel.py index d20c77c98a..3f8147d2e9 100644 --- a/tests/ut/python/parallel/test_pipeline_parallel.py +++ b/tests/ut/python/parallel/test_pipeline_parallel.py @@ -74,6 +74,7 @@ def test_gatherv2_semi_samestage1(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) def test_gatherv2_semi_samestage2(): @@ -86,4 +87,5 @@ def test_gatherv2_semi_samestage2(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py index 7ac0c3cf7b..4732823781 100644 --- a/tests/ut/python/parallel/test_prelu.py +++ b/tests/ut/python/parallel/test_prelu.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) @@ -166,6 +167,7 @@ def test_prelu_parallel_success3(): w = Tensor(np.random.rand(16), dtype=ms.float32) net = GradWrap3(NetWithLoss3(Net(strategy1, strategy2))) net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, w) diff --git a/tests/ut/python/parallel/test_reduce_method_info.py b/tests/ut/python/parallel/test_reduce_method_info.py index 7d2c100f65..6e369ff134 100644 --- a/tests/ut/python/parallel/test_reduce_method_info.py +++ b/tests/ut/python/parallel/test_reduce_method_info.py @@ -69,11 +69,13 @@ class GradWrap(nn.Cell): def compile_net_no_bias(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_repeated_calc.py b/tests/ut/python/parallel/test_repeated_calc.py index 5ce133a759..8e1d7f3c48 100644 --- a/tests/ut/python/parallel/test_repeated_calc.py +++ b/tests/ut/python/parallel/test_repeated_calc.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index cb2aac7109..00b25c1afc 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -317,6 +317,7 @@ class ReshapeNet6(nn.Cell): def compile_net(net, input_): net.set_auto_parallel() + net.set_train() _executor.compile(net, input_) diff --git a/tests/ut/python/parallel/test_reshape_optimized.py b/tests/ut/python/parallel/test_reshape_optimized.py index 74b4c0024d..e3b3003bfd 100644 --- a/tests/ut/python/parallel/test_reshape_optimized.py +++ b/tests/ut/python/parallel/test_reshape_optimized.py @@ -44,6 +44,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py index 3b23c4d13a..9d6740cad2 100644 --- a/tests/ut/python/parallel/test_reshape_parameter.py +++ b/tests/ut/python/parallel/test_reshape_parameter.py @@ -63,6 +63,7 @@ class Net(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_reshape_skip_redistribution.py b/tests/ut/python/parallel/test_reshape_skip_redistribution.py index 29c0144301..cb9d0a121c 100644 --- a/tests/ut/python/parallel/test_reshape_skip_redistribution.py +++ b/tests/ut/python/parallel/test_reshape_skip_redistribution.py @@ -47,6 +47,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_reshape_unexpand.py b/tests/ut/python/parallel/test_reshape_unexpand.py index d0144bebdf..137fe66a58 100644 --- a/tests/ut/python/parallel/test_reshape_unexpand.py +++ b/tests/ut/python/parallel/test_reshape_unexpand.py @@ -67,6 +67,7 @@ def test_reshape_unexpand(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_1(): @@ -89,6 +90,7 @@ def test_reshape_unexpand_1(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_2(): @@ -111,6 +113,7 @@ def test_reshape_unexpand_2(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_3(): @@ -134,6 +137,7 @@ def test_reshape_unexpand_3(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_4(): @@ -157,6 +161,7 @@ def test_reshape_unexpand_4(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_5(): @@ -180,6 +185,7 @@ def test_reshape_unexpand_5(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_6(): @@ -203,6 +209,7 @@ def test_reshape_unexpand_6(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_7(): @@ -235,6 +242,7 @@ def test_reshape_unexpand_7(): x = Tensor(np.ones([32, 3, 224, 224]), dtype=ms.float32) net = GradWrap(NetWithLoss(Net())) net.set_auto_parallel() + net.set_train() _executor.compile(net, x) def test_reshape_unexpand_8(): @@ -257,4 +265,5 @@ def test_reshape_unexpand_8(): net = GradWrap(NetWithLoss(Net())) context.set_auto_parallel_context(parallel_mode="auto_parallel") net.set_auto_parallel() + net.set_train() _executor.compile(net, x) diff --git a/tests/ut/python/parallel/test_scalar_loss.py b/tests/ut/python/parallel/test_scalar_loss.py index 1043b2997c..f25e7737ba 100644 --- a/tests/ut/python/parallel/test_scalar_loss.py +++ b/tests/ut/python/parallel/test_scalar_loss.py @@ -60,4 +60,5 @@ def test_sum_as_loss(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_self_attention.py b/tests/ut/python/parallel/test_self_attention.py index a484e1fd63..abba5b84cf 100644 --- a/tests/ut/python/parallel/test_self_attention.py +++ b/tests/ut/python/parallel/test_self_attention.py @@ -52,6 +52,7 @@ class GradWrap(nn.Cell): def compile_net(net, x): net.set_auto_parallel() + net.set_train() _executor.compile(net, x) diff --git a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py index 85fc37c497..97aa17b26a 100644 --- a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py +++ b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py @@ -107,4 +107,5 @@ def test_two_subgraphs(): net = TrainStepWrap(NetWithLoss(Net())) input_x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32) net.set_auto_parallel() + net.set_train() _executor.compile(net, input_x) diff --git a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py index 0311f82462..d8e00c32c3 100644 --- a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py +++ b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py @@ -43,6 +43,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py index 69d1f7c47c..c48e0dbbf5 100644 --- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py +++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py @@ -48,6 +48,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_sparse_feature_bprop.py b/tests/ut/python/parallel/test_sparse_feature_bprop.py index 78dcd6dacb..1ba968d62b 100644 --- a/tests/ut/python/parallel/test_sparse_feature_bprop.py +++ b/tests/ut/python/parallel/test_sparse_feature_bprop.py @@ -60,6 +60,7 @@ def test_bprop_with_sparse_feature_allreduce(): net = GradWrap(Net()) x = Tensor(np.ones([64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x) @@ -87,6 +88,7 @@ def test_bprop_with_sparse_feature_mirror(): def compile_net(net): optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) train_net = TrainOneStepCell(net, optimizer) + train_net.set_train() _executor.compile(train_net, _x, _b) net = Net() @@ -119,6 +121,7 @@ def test_bprop_with_sparse_feature_dataparallel(): def compile_net(net): optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) train_net = TrainOneStepCell(net, optimizer) + train_net.set_train() _executor.compile(train_net, _x, _b) net = Net() diff --git a/tests/ut/python/parallel/test_sparse_gather_v2.py b/tests/ut/python/parallel/test_sparse_gather_v2.py index 2250e49320..e80f3c0832 100644 --- a/tests/ut/python/parallel/test_sparse_gather_v2.py +++ b/tests/ut/python/parallel/test_sparse_gather_v2.py @@ -72,6 +72,7 @@ def test_gatherv2_semi_auto0(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -84,6 +85,7 @@ def test_gatherv2_semi_auto1(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -96,6 +98,7 @@ def test_gatherv2_semi_auto2(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -108,6 +111,7 @@ def test_gatherv2_semi_auto3(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -120,6 +124,7 @@ def test_gatherv2_semi_auto4(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -132,6 +137,7 @@ def test_gatherv2_semi_auto5(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -143,6 +149,7 @@ def test_gatherv2_semi_auto6(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -154,6 +161,7 @@ def test_gatherv2_semi_auto7(): x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -163,6 +171,7 @@ def test_gatherv2_auto0(): net.set_auto_parallel() x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -172,6 +181,7 @@ def test_gatherv2_auto1(): net.set_auto_parallel() x = Tensor(np.ones([64, 32]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -184,6 +194,7 @@ def test_gatherv2_cpu0(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -196,6 +207,7 @@ def test_gatherv2_cpu1(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) @@ -208,4 +220,5 @@ def test_gatherv2_cpu2(): x = Tensor(np.ones([64, 64]), dtype=ms.float32) y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32) + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_split.py b/tests/ut/python/parallel/test_split.py index 0ebf22fdff..a7210ab77c 100644 --- a/tests/ut/python/parallel/test_split.py +++ b/tests/ut/python/parallel/test_split.py @@ -79,6 +79,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x) context.reset_auto_parallel_context() @@ -88,6 +89,7 @@ def compile_net1(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x1) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_split_grad_sens.py b/tests/ut/python/parallel/test_split_grad_sens.py index 2d57604a94..077dd9540e 100644 --- a/tests/ut/python/parallel/test_split_grad_sens.py +++ b/tests/ut/python/parallel/test_split_grad_sens.py @@ -66,10 +66,12 @@ class GradWrap4(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) def compile_net_no_bias(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) def test_no_grad(): @@ -120,6 +122,7 @@ def test_grad_sens_parameter_type(): sens = Tensor(np.ones([128, 64]), dtype=ms.float32) net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b, sens, phase='train', auto_parallel_mode=True) x_layout = ([8, 8], [1, -1], [16, 32], 0, True, '') y_layout = ([8, 8], [-1, 0], [32, 8], 0, True, '') diff --git a/tests/ut/python/parallel/test_square.py b/tests/ut/python/parallel/test_square.py index 823a21ad1f..a354395c7d 100644 --- a/tests/ut/python/parallel/test_square.py +++ b/tests/ut/python/parallel/test_square.py @@ -45,6 +45,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_squeeze_info.py b/tests/ut/python/parallel/test_squeeze_info.py index 1edee94552..76bedeb000 100644 --- a/tests/ut/python/parallel/test_squeeze_info.py +++ b/tests/ut/python/parallel/test_squeeze_info.py @@ -39,6 +39,7 @@ _b = Tensor(np.ones([64, 32]), dtype=ms.float32) def compile_net(net): net.set_auto_parallel() + net.set_train() _executor.compile(net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_step_parallel.py b/tests/ut/python/parallel/test_step_parallel.py index db3bab1ab0..5eb9ba7157 100644 --- a/tests/ut/python/parallel/test_step_parallel.py +++ b/tests/ut/python/parallel/test_step_parallel.py @@ -76,4 +76,5 @@ def test_two_matmul(): b = Tensor(np.ones([128, 128]), dtype=ms.float32) a = Tensor(np.ones([128, 128]), dtype=ms.float32) net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b, a) diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py index 31e0c20034..9a2db97951 100644 --- a/tests/ut/python/parallel/test_strategy_checkpoint.py +++ b/tests/ut/python/parallel/test_strategy_checkpoint.py @@ -87,6 +87,7 @@ def test_six_matmul_save(): net.set_auto_parallel() x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x1, x6) @@ -149,6 +150,7 @@ def test_six_matmul_load(): x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x1, x6, x7) @@ -205,6 +207,7 @@ def test_six_matmul_save_auto(): net.set_auto_parallel() x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x1, x6) @@ -265,4 +268,5 @@ def test_six_matmul_load_auto(): x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) + net.set_train() _executor.compile(net, x1, x6, x7) diff --git a/tests/ut/python/parallel/test_stridedslice.py b/tests/ut/python/parallel/test_stridedslice.py index 828b7f80ed..acf2344699 100644 --- a/tests/ut/python/parallel/test_stridedslice.py +++ b/tests/ut/python/parallel/test_stridedslice.py @@ -71,6 +71,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_sum_as_loss.py b/tests/ut/python/parallel/test_sum_as_loss.py index 60162cb6e6..35a5197385 100644 --- a/tests/ut/python/parallel/test_sum_as_loss.py +++ b/tests/ut/python/parallel/test_sum_as_loss.py @@ -37,6 +37,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_tile.py b/tests/ut/python/parallel/test_tile.py index 14cfdfb59c..7cae9f68b5 100644 --- a/tests/ut/python/parallel/test_tile.py +++ b/tests/ut/python/parallel/test_tile.py @@ -64,6 +64,7 @@ def compile_net(net): optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, _x, _b) context.reset_auto_parallel_context() diff --git a/tests/ut/python/parallel/test_train_and_eval.py b/tests/ut/python/parallel/test_train_and_eval.py index a851e9c318..1cbccbf959 100644 --- a/tests/ut/python/parallel/test_train_and_eval.py +++ b/tests/ut/python/parallel/test_train_and_eval.py @@ -34,7 +34,7 @@ class Net(Cell): return out -class EvalNet(Cell): +class EvalNet(Cell): def __init__(self, network, strategy2=None): super().__init__() self.network = network @@ -46,9 +46,9 @@ class EvalNet(Cell): return out -_x = Tensor(np.ones([8, 8]), dtype=ms.float32) -_w1 = Tensor(np.ones([8, 8]), dtype=ms.float32) -_b = Tensor(np.ones([8, 8]), dtype=ms.float32) +_x = Tensor(np.ones([64, 64]), dtype=ms.float32) +_w1 = Tensor(np.ones([64, 64]), dtype=ms.float32) +_b = Tensor(np.ones([64, 64]), dtype=ms.float32) def test_train_and_eval(): @@ -58,8 +58,8 @@ def test_train_and_eval(): strategy2 = ((4, 4),) net = Net(_w1, strategy1, strategy2) eval_net = EvalNet(net, strategy2=strategy2) - net.set_train() net.set_auto_parallel() + net.set_train() _executor.compile(net, _x, _b, phase='train', auto_parallel_mode=True) eval_net.set_train(mode=False) diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py index dbef5e7e71..13460f5828 100644 --- a/tests/ut/python/parallel/test_two_matmul.py +++ b/tests/ut/python/parallel/test_two_matmul.py @@ -49,6 +49,7 @@ class GradWrap(nn.Cell): def compile_net(net, x, y, b): net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) diff --git a/tests/ut/python/parallel/test_two_weights_parameter.py b/tests/ut/python/parallel/test_two_weights_parameter.py index 50d4fb17b0..cebee4ce80 100644 --- a/tests/ut/python/parallel/test_two_weights_parameter.py +++ b/tests/ut/python/parallel/test_two_weights_parameter.py @@ -80,4 +80,5 @@ def test_two_weights_parameter(): train_net = OneStepCell(net_with_loss) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") train_net.set_auto_parallel() + train_net.set_train() _executor.compile(train_net, x, b) diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py index e9015713ca..e1be9e7868 100644 --- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py +++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py @@ -76,6 +76,7 @@ def test_virtual_dataset_3_input(): y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 2048]), dtype=ms.float32) net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b) @@ -100,6 +101,7 @@ def test_virtualdataset_cell_3_inputs(): y = Tensor(np.ones([32, 64]), dtype=ms.float32) b = Tensor(np.ones([64, 2048]), dtype=ms.float32) net.set_auto_parallel() + net.set_train() _executor.compile(net, x, y, b)