diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc index 3df381eb1a..50a611a2ba 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc @@ -738,27 +738,7 @@ AnfNodePtr PynativeExecutor::MakeCNode(const OpExecInfoPtr &op_exec_info, std::v inputs.emplace_back(input_node); } } - - auto const_input_index = prim->get_const_input_indexes(); - bool have_const_input = !const_input_index.empty(); - bool is_const_prim = prim->is_const_prim(); - MS_LOG(DEBUG) << prim->ToString() << " abs is nullptr " << (abs == nullptr) << " is_const_value " - << prim->is_const_prim(); - bool is_const_input = - have_const_input && std::find(const_input_index.begin(), const_input_index.end(), i) != const_input_index.end(); - if (abs == nullptr || is_const_prim || is_const_input) { - MS_LOG(DEBUG) << "MakeCnode get node no in map " << id; - ValuePtr input_value = PyAttrValue(obj); - abs = input_value->ToAbstract(); - if (!is_const_prim && !is_const_input) { - auto config = abstract::AbstractBase::kBroadenTensorOnly; - abs = abs->Broaden(config); - MS_LOG(DEBUG) << "Broaden for " << prim->ToString() << " " << config; - } - node_abs_map_[id] = abs; - } - - (*args_spec_list).emplace_back(abs); + (*args_spec_list).emplace_back(CheckConstValue(prim, obj, abs, id, i)); } CNodePtr cnode = nullptr; @@ -770,6 +750,34 @@ AnfNodePtr PynativeExecutor::MakeCNode(const OpExecInfoPtr &op_exec_info, std::v return cnode; } +abstract::AbstractBasePtr PynativeExecutor::CheckConstValue(const PrimitivePyPtr &prim, const py::object &obj, + const abstract::AbstractBasePtr &abs, const std::string &id, + size_t index) { + MS_EXCEPTION_IF_NULL(prim); + auto const_input_index = prim->get_const_input_indexes(); + bool have_const_input = !const_input_index.empty(); + bool is_const_prim = prim->is_const_prim(); + auto new_abs = abs; + MS_LOG(DEBUG) << prim->ToString() << " abs is nullptr " << (abs == nullptr) << " is_const_value " + << prim->is_const_prim(); + bool is_const_input = + have_const_input && std::find(const_input_index.begin(), const_input_index.end(), index) != const_input_index.end(); + if (abs == nullptr || is_const_prim || is_const_input) { + MS_LOG(DEBUG) << "MakeCnode get node no in map " << id; + ValuePtr input_value = PyAttrValue(obj); + MS_EXCEPTION_IF_NULL(input_value); + new_abs = input_value->ToAbstract(); + if (!is_const_prim && !is_const_input) { + auto config = abstract::AbstractBase::kBroadenTensorOnly; + MS_EXCEPTION_IF_NULL(new_abs); + new_abs = new_abs->Broaden(config); + MS_LOG(DEBUG) << "Broaden for " << prim->ToString() << " " << config; + } + node_abs_map_[id] = new_abs; + } + return new_abs; +} + void PynativeExecutor::GetOpOutputAbstract(const OpExecInfoPtr &op_exec_info, const abstract::AbstractBasePtrList &args_spec_list, bool *is_find) { MS_EXCEPTION_IF_NULL(is_find); @@ -1004,6 +1012,7 @@ AnfNodePtr PynativeExecutor::GetInput(const py::object &obj, bool op_mask) { return free_param; } node = graph_info->node_map.at(obj_id).first; + MS_EXCEPTION_IF_NULL(node); MS_LOG(DEBUG) << "Get input param node " << node->ToString() << " " << obj_id; return node; } @@ -2008,9 +2017,14 @@ void PynativeExecutor::MakeNewTopGraph(const string &cell_id, const py::args &ar top_cell_id_ = cell_id; in_grad_process_ = true; // update forward already run flag with previous top cell + std::string input_args_id; + for (size_t i = 0; i < args.size(); ++i) { + input_args_id = input_args_id + GetId(args[i]) + "_"; + } auto pre_top_cell = GetTopCell(cell_id); if (pre_top_cell != nullptr) { pre_top_cell->forward_already_run = true; + pre_top_cell->input_args_id = input_args_id; } auto df_builder = std::make_shared(); auto graph_info = std::make_shared(cell_id); @@ -2019,6 +2033,7 @@ void PynativeExecutor::MakeNewTopGraph(const string &cell_id, const py::args &ar resource->results()[pipeline::kPynativeGraphId] = graph_id_++; auto top_cell_info = std::make_shared(true, resource, df_builder, cell_id); top_cell_info->forward_already_run = true; + top_cell_info->input_args_id = input_args_id; if (!IsTopestGraph(cell_id)) { top_cell_info->top_cell_index = cell_graph_list_.size(); top_cell_index_ = top_cell_info->top_cell_index; @@ -2862,11 +2877,24 @@ py::object PynativeExecutor::CheckGraph(const py::object &cell, const py::args & } py::object PynativeExecutor::CheckAlreadyRun(const py::object &cell, const py::args &args) { + bool forward_run = false; const auto &cell_id = GetCellId(cell, args); + // Checkout whether top cell has already run. + std::string input_args_id; + for (size_t i = 0; i < args.size(); ++i) { + input_args_id = input_args_id + GetId(args[i]) + "_"; + } auto top_cell = GetTopCell(cell_id); - bool forward_run = false; if (top_cell != nullptr) { - forward_run = top_cell->forward_already_run; + if (!top_cell->input_args_id.empty() && top_cell->input_args_id != input_args_id && top_cell->forward_already_run && + CheckDynamicCell(cell_id)) { + MS_LOG(WARNING) << "The construct of running cell is dynamic and the input info of this cell has changed, " + "forward process will run again"; + top_cell->forward_already_run = false; + top_cell->input_args_id = input_args_id; + } else { + forward_run = top_cell->forward_already_run; + } if (forward_run) { top_cell_index_ = top_cell->top_cell_index; } diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h index 912ee64df7..e80cfdde15 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.h +++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h @@ -107,6 +107,7 @@ class TopCellInfo { std::string cell_id; std::string sens_id; std::string weights_id; + std::string input_args_id; }; using GraphInfoPtr = std::shared_ptr; @@ -209,6 +210,8 @@ class PynativeExecutor : public std::enable_shared_from_this { AnfNodePtr MakeValueNode(const py::object &obj, const std::string &obj_id); AnfNodePtr MakeCNode(const OpExecInfoPtr &op_exec_info, std::vector *op_masks, abstract::AbstractBasePtrList *args_spec_list); + abstract::AbstractBasePtr CheckConstValue(const PrimitivePyPtr &prim, const py::object &obj, + const abstract::AbstractBasePtr &abs, const std::string &id, size_t index); void GetOpOutputAbstract(const OpExecInfoPtr &op_exec_info, const abstract::AbstractBasePtrList &args_spec_list, bool *is_find); void SaveOutputNodeMap(const std::string &obj_id, const py::object &out_real, const AnfNodePtr &cnode); diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index ba37f5bcdc..ba5896f5bd 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -307,6 +307,23 @@ class Cell(Cell_): res.append(cast(item, dst_type)) return tuple(res) + def do_parameter_broadcast(self): + if context.get_auto_parallel_context("parallel_mode") == ParallelMode.DATA_PARALLEL: + if not self.parameter_broadcast_done: + _pynative_exec.parameter_broadcast(self, self.phase, self._auto_parallel_mode) + self.parameter_broadcast_done = True + + def run_construct(self, cast_inputs, kwargs): + if self.enable_hook: + _pynative_exec.enter_construct(self) + output = self._hook_construct(*cast_inputs, **kwargs) + _pynative_exec.leave_construct(self) + else: + _pynative_exec.enter_construct(self) + output = self.construct(*cast_inputs, **kwargs) + _pynative_exec.leave_construct(self) + return output + def __call__(self, *inputs, **kwargs): if self.__class__.construct is Cell.construct: logger.warning(f"The '{self.__class__}' does not override the method 'construct', " @@ -324,11 +341,7 @@ class Cell(Cell_): out = self.compile_and_run(*inputs) return out - if context.get_auto_parallel_context("parallel_mode") == ParallelMode.DATA_PARALLEL: - if not self.parameter_broadcast_done: - _pynative_exec.parameter_broadcast(self, self.phase, self._auto_parallel_mode) - self.parameter_broadcast_done = True - + self.do_parameter_broadcast() for item in inputs: if isinstance(item, numpy.ndarray): raise TypeError("cell inputs should not be numpy array.") @@ -349,14 +362,7 @@ class Cell(Cell_): cast_inputs = self._cast_mixed_precision_inputs(inputs, mstype.float32) if not cast_inputs: cast_inputs = inputs - if self.enable_hook: - _pynative_exec.enter_construct(self) - output = self._hook_construct(*cast_inputs, **kwargs) - _pynative_exec.leave_construct(self) - else: - _pynative_exec.enter_construct(self) - output = self.construct(*cast_inputs, **kwargs) - _pynative_exec.leave_construct(self) + output = self.run_construct(cast_inputs, kwargs) if isinstance(output, Parameter): output = output.data if self.requires_grad is True: diff --git a/mindspore/nn/wrap/__init__.py b/mindspore/nn/wrap/__init__.py index 09ea8fca6d..63db4437de 100644 --- a/mindspore/nn/wrap/__init__.py +++ b/mindspore/nn/wrap/__init__.py @@ -17,7 +17,7 @@ Wrap cells for networks. Use the Wrapper to combine the loss or build the training steps. """ -from .cell_wrapper import TrainOneStepCell, WithLossCell, WithGradCell, WithEvalCell, \ +from .cell_wrapper import ForwardValueAndGrad, TrainOneStepCell, WithLossCell, WithGradCell, WithEvalCell, \ ParameterUpdate, GetNextSingleOp, VirtualDatasetCellTriple from .loss_scale import TrainOneStepWithLossScaleCell, DynamicLossScaleUpdateCell, FixedLossScaleUpdateCell from .grad_reducer import DistributedGradReducer @@ -25,6 +25,7 @@ from ..layer.timedistributed import TimeDistributed __all__ = [ "TimeDistributed", + "ForwardValueAndGrad", "TrainOneStepCell", "WithLossCell", "WithGradCell", diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py index bd80e7369b..db15a98a3c 100644 --- a/mindspore/nn/wrap/cell_wrapper.py +++ b/mindspore/nn/wrap/cell_wrapper.py @@ -13,9 +13,12 @@ # limitations under the License. # ============================================================================ """Cell_wrapper.""" +from types import FunctionType, MethodType + from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) from mindspore.context import ParallelMode +from ...common.tensor import Tensor from ...common import dtype as mstype from ...common.parameter import Parameter, ParameterTuple from ...ops import composite as C @@ -174,6 +177,107 @@ class WithGradCell(Cell): return grads +class ForwardValueAndGrad(Cell): + r""" + Network training package class. + + Including the network and a gradient function. The resulting Cell is trained with input '\*inputs'. + The backward graph will be created in the gradient function to calculating gradient. + + Args: + network (Cell): The training network. The network only supports single output. + weights (ParameterTuple): The parameters of the training network that need to calculate the gradient + get_all (bool): If True, get all the gradients with respect to inputs. Default: False. + get_by_list (bool): If True, get all the gradients with respect to Parameter variables. + If get_all and get_by_list are both False, get the gradient with respect to first input. + If get_all and get_by_list are both True, get the gradients with respect to inputs and Parameter variables + at the same time in the form of ((gradients with respect to inputs), + (gradients with respect to parameters)). Default: False. + sens_param (bool): Whether to append sensitivity (gradient with respect to output) as input. + If sens_param is False, a 'ones_like(outputs)' sensitivity will be attached automatically. + Default: False. + If the sensor_param is True, a sensitivity (gradient with respect to output) needs to be transferred through + the location parameter or key-value pair parameter. If the value is transferred through the key-value pair + parameter, the key must be sens. + + Inputs: + - **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`. + - sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0. + + Outputs: + - **forward value** (a scalar Tensor with shape :math:`()`) - The result of network forward running. + - **gradients** (tuple(tensor)) - The gradients of network parameters and inputs. + + Supported Platforms: + ``Ascend`` ``GPU````CPU`` + + Examples: + >>> inputs = Tensor(np.ones([32, 1, 32, 32]).astype(np.float32)) + >>> labels = Tensor(np.ones([32]).astype(np.int32)) + >>> net = Net() + >>> weights = ParameterTuple(filter(lambda x: x.requires_grad, net.get_parameters())) + >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits() + >>> #1) Using the WithLossCell existing provide + >>> loss_net = nn.WithLossCell(net, loss_fn) + >>> forward_value_and_grad = nn.ForwardValueAndGrad(loss_net, weights=weight, get_by_list=True, sens_param=True) + >>> loss, grads = forward_value_and_grad(inputs, labels, 1.0) + >>> + >>> #2) Using user-defined WithLossCell + >>> class MyWithLossCell(Cell): + ... def __init__(self, backbone, loss_fn): + ... super(MyWithLossCell, self).__init__(auto_prefix=False) + ... self._backbone = backbone + ... self._loss_fn = loss_fn + ... + ... def construct(self, x, y, label): + ... out = self._backbone(x, y) + ... return self._loss_fn(out, label) + ... + ... @property + ... def backbone_network(self): + ... return self._backbone + ... + >>> loss_net = MyWithLossCell(net, loss_fn) + >>> forward_value_and_grad = nn.ForwardValueAndGrad(loss_net, weights=weight, get_by_list=True, sens_param=True) + >>> loss, grads = forward_value_and_grad(inputs, labels, 1.0) + """ + + def __init__(self, network, weights=None, get_all=False, get_by_list=False, sens_param=False): + super(ForwardValueAndGrad, self).__init__(auto_prefix=False) + if not isinstance(network, (Cell, FunctionType, MethodType)): + raise TypeError(f"The type of training network should be cell, function type or method type, " + f"but got '{type(network)}'") + if get_by_list and not isinstance(weights, ParameterTuple): + raise TypeError(f"When get_by_list is set to True, the parameters of training network should be " + f"ParameterTuple type, but got '{type(weights)}'") + if get_by_list is not True and weights is not None: + raise TypeError(f"When get_by_list is set to False, the parameters of training network should be " + f"NoneType, but got '{type(weights)}'") + self.network = network + self.network.set_grad() + self.weights = weights + self.get_all = get_all + self.get_by_list = get_by_list + self.sens_param = sens_param + self.grad = C.GradOperation(get_all=self.get_all, get_by_list=self.get_by_list, sens_param=self.sens_param) + + def construct(self, *inputs): + weights = self.weights + if self.sens_param: + sens = inputs[-1] + inputs = inputs[:-1] + else: + sens = None + loss = self.network(*inputs) + if self.sens_param: + if not isinstance(sens, Tensor): + sens = P.Fill()(P.DType()(loss), P.Shape()(loss), sens) + grads = self.grad(self.network, weights)(*inputs, sens) + else: + grads = self.grad(self.network, weights)(*inputs) + return loss, grads + + class TrainOneStepCell(Cell): r""" Network training package class. diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py index 4d59daa04f..4e4d4a8c32 100644 --- a/tests/st/networks/test_gpu_resnet.py +++ b/tests/st/networks/test_gpu_resnet.py @@ -22,10 +22,10 @@ import pytest import mindspore.context as context import mindspore.nn as nn -from mindspore import Tensor +from mindspore import Tensor, ParameterTuple from mindspore import amp from mindspore.nn import Dense -from mindspore.nn import TrainOneStepCell, WithLossCell +from mindspore.nn import TrainOneStepCell, WithLossCell, ForwardValueAndGrad from mindspore.nn.cell import Cell from mindspore.nn.layer.basic import Flatten from mindspore.nn.layer.conv import Conv2d @@ -33,6 +33,7 @@ from mindspore.nn.layer.normalization import BatchNorm2d from mindspore.nn.layer.pooling import MaxPool2d from mindspore.nn.optim import Momentum from mindspore.ops import operations as P +from mindspore.ops import functional as F from mindspore.ops.operations import Add context.set_context(mode=context.GRAPH_MODE, device_target="GPU") @@ -399,3 +400,53 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): assert (losses[-1][0].asnumpy() < 1) assert not losses[-1][1].asnumpy() assert (losses[-1][2].asnumpy() > 1) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_trainTensor_with_new_interface(num_classes=10, epoch=8, batch_size=1): + net = resnet50(num_classes) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + net_with_criterion = WithLossCell(net, criterion) + net_with_criterion.set_train() + + weights = ParameterTuple(filter(lambda x: x.requires_grad, net.get_parameters())) + optimizer = Momentum(weights, 0.1, 0.9) + + train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True) + losses = [] + for i in range(0, epoch): + data = Tensor(np.ones([batch_size, 3, 224, 224] + ).astype(np.float32) * 0.01) + label = Tensor(np.ones([batch_size]).astype(np.int32)) + loss, grads = train_network(data, label, 1.0) + grads = F.identity(grads) + optimizer(grads) + losses.append(loss) + assert (losses[-1].asnumpy() < 0.8) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_big_batchSize_with_new_interface(num_classes=10, epoch=8, batch_size=338): + net = resnet50(num_classes) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + net_with_criterion = WithLossCell(net, criterion) + net_with_criterion.set_train() + + weights = ParameterTuple(filter(lambda x: x.requires_grad, net.get_parameters())) + optimizer = Momentum(weights, 0.1, 0.9) + + train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True) + losses = [] + for i in range(0, epoch): + data = Tensor(np.ones([batch_size, 3, 224, 224] + ).astype(np.float32) * 0.01) + label = Tensor(np.ones([batch_size]).astype(np.int32)) + loss, grads = train_network(data, label, 1.0) + grads = F.identity(grads) + optimizer(grads) + losses.append(loss) + assert (losses[-1].asnumpy() < 0.8) diff --git a/tests/st/pynative/test_pynative_lenet.py b/tests/st/pynative/test_pynative_lenet.py index 29a7ddbc40..cc665d697e 100644 --- a/tests/st/pynative/test_pynative_lenet.py +++ b/tests/st/pynative/test_pynative_lenet.py @@ -164,3 +164,40 @@ def test_ascend_pynative_lenet(): print("======epoch: ", epoch, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time) assert loss_output.asnumpy() < 0.004 assert loss_output.asnumpy() > 0.003 + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_pynative_lenet_with_new_interface(): + context.set_context(mode=context.PYNATIVE_MODE) + + epoch_size = 20 + batch_size = 32 + inputs = Tensor(np.ones([batch_size, 1, 32, 32]).astype(np.float32)) + labels = Tensor(np.ones([batch_size]).astype(np.int32)) + + net = LeNet() + criterion = CrossEntropyLoss() + net_with_criterion = WithLossCell(net, criterion) + net_with_criterion.set_train() + + weights = ParameterTuple(filter(lambda x: x.requires_grad, net.get_parameters())) + optimizer = Momentum(weights, 0.1, 0.9) + + forward_value_and_grad = nn.ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True) + total_time = 0 + for epoch in range(0, epoch_size): + start_time = time.time() + loss_output, grads = forward_value_and_grad(inputs, labels) + optimizer(grads) + end_time = time.time() + cost_time = end_time - start_time + total_time = total_time + cost_time + + print("======epoch: ", epoch, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time) + assert loss_output.asnumpy() < 0.005 + assert loss_output.asnumpy() > 0.003