From 908b590d8e7e160471fdbfa057eeabb9698b096f Mon Sep 17 00:00:00 2001 From: yepei6 Date: Mon, 8 Mar 2021 12:30:50 +0800 Subject: [PATCH] update comments --- mindspore/compression/quant/qat.py | 29 ++++++++-------- mindspore/compression/quant/quant_utils.py | 33 ++++++++++--------- mindspore/context.py | 22 ++++++------- mindspore/train/callback/_checkpoint.py | 19 ++++++----- .../train/callback/_summary_collector.py | 24 +++++++------- mindspore/train/summary/summary_record.py | 8 ++--- 6 files changed, 69 insertions(+), 66 deletions(-) diff --git a/mindspore/compression/quant/qat.py b/mindspore/compression/quant/qat.py index c2607fcece..94108d4f14 100644 --- a/mindspore/compression/quant/qat.py +++ b/mindspore/compression/quant/qat.py @@ -46,21 +46,21 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ Configs the observer type of weights and data flow with quant params. Args: - quant_observer (Observer, list or tuple): The observer type to do quantization. The first element represent + quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element represent weights and second element represent data flow. Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver) - quant_delay (int, list or tuple): Number of steps after which weights and activations are quantized during + quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during eval. The first element represent weights and second element represent data flow. Default: (0, 0) - quant_dtype (QuantDtype, list or tuple): Datatype to use for quantize weights and activations. The first + quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first element represent weights and second element represent data flow. Default: (QuantDtype.INT8, QuantDtype.INT8) - per_channel (bool, list or tuple): Quantization granularity based on layer or on channel. If `True` + per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True` then base on per channel otherwise base on per layer. The first element represent weights and second element represent data flow. Default: (False, False) - symmetric (bool, list or tuple): Whether the quantization algorithm is symmetric or not. If `True` then base on - symmetric otherwise base on asymmetric. The first element represent weights and second + symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then + base on symmetric otherwise base on asymmetric. The first element represent weights and second element represent data flow. Default: (False, False) - narrow_range (bool, list or tuple): Whether the quantization algorithm uses narrow range or not. + narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not. The first element represents weights and the second element represents data flow. Default: (False, False) Returns: @@ -123,20 +123,20 @@ class QuantizationAwareTraining(Quantizer): Args: bn_fold (bool): Flag to used bn fold ops for simulation inference operation. Default: True. freeze_bn (int): Number of steps after which BatchNorm OP parameters used total mean and variance. Default: 1e7. - quant_delay (int, list or tuple): Number of steps after which weights and activations are quantized during + quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during eval. The first element represent weights and second element represent data flow. Default: (0, 0) - quant_dtype (QuantDtype, list or tuple): Datatype to use for quantize weights and activations. The first + quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first element represent weights and second element represent data flow. Default: (QuantDtype.INT8, QuantDtype.INT8) - per_channel (bool, list or tuple): Quantization granularity based on layer or on channel. If `True` + per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True` then base on per channel otherwise base on per layer. The first element represent weights and second element represent data flow. Default: (False, False) - symmetric (bool, list or tuple): Whether the quantization algorithm is symmetric or not. If `True` then base on - symmetric otherwise base on asymmetric. The first element represent weights and second + symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then + base on symmetric otherwise base on asymmetric. The first element represent weights and second element represent data flow. Default: (False, False) - narrow_range (bool, list or tuple): Whether the quantization algorithm uses narrow range or not. + narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not. The first element represents weights and the second element represents data flow. Default: (False, False) - optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options, currently only + optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently only support QAT. Default: OptimizeOption.QAT one_conv_fold (bool): Flag to used one conv bn fold ops for simulation inference operation. Default: True. @@ -187,6 +187,7 @@ class QuantizationAwareTraining(Quantizer): one_conv_fold=True): """Init for QuantizationAwareTraining quantizer""" super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option) + def convert2list(name, value): if not isinstance(value, list) and not isinstance(value, tuple): value = [value] diff --git a/mindspore/compression/quant/quant_utils.py b/mindspore/compression/quant/quant_utils.py index 7b4606fb55..890e33b6dd 100644 --- a/mindspore/compression/quant/quant_utils.py +++ b/mindspore/compression/quant/quant_utils.py @@ -133,6 +133,7 @@ def weight2int(data, scale, zero_point, data_type, num_bits=8, narrow_range=Fals weight_int[weight_int < quant_min] = quant_min return weight_int + def scale_zp_max_min_from_fake_quant_cell(cell, data_type): """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMax`.""" minq = cell.minq.data.asnumpy() @@ -271,31 +272,31 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param Load fp32 model parameters into quantization model. Args: - quant_model: quantization model. - params_dict: parameter dict that stores fp32 parameters. - quant_new_params: parameters that exist in quantitative network but not in unquantitative network. + quant_model(Cell): quantization model. + params_dict(dict): parameter dict that stores fp32 parameters. + quant_new_params(list): parameters that exist in quantitative network but not in unquantitative network. Returns: None """ iterable_dict = { - 'weight': iter([item for item in params_dict.items() if item[0].endswith('weight')]), - 'bias': iter([item for item in params_dict.items() if item[0].endswith('bias')]), - 'gamma': iter([item for item in params_dict.items() if item[0].endswith('gamma')]), - 'beta': iter([item for item in params_dict.items() if item[0].endswith('beta')]), - 'moving_mean': iter([item for item in params_dict.items() if item[0].endswith('moving_mean')]), - 'moving_variance': iter( - [item for item in params_dict.items() if item[0].endswith('moving_variance')]), - 'minq': iter([item for item in params_dict.items() if item[0].endswith('minq')]), - 'maxq': iter([item for item in params_dict.items() if item[0].endswith('maxq')]) + 'weight': iter(list(filter(lambda item: item[0].endswith('weight'), params_dict.items()))), + 'bias': iter(list(filter(lambda item: item[0].endswith('bias'), params_dict.items()))), + 'gamma': iter(list(filter(lambda item: item[0].endswith('gamma'), params_dict.items()))), + 'beta': iter(list(filter(lambda item: item[0].endswith('beta'), params_dict.items()))), + 'moving_mean': iter(list(filter(lambda item: item[0].endswith('moving_mean'), params_dict.items()))), + 'moving_variance': iter(list(filter(lambda item: item[0].endswith('moving_variance'), params_dict.items()))), + 'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))), + 'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))) } + for name, param in quant_model.parameters_and_names(): key_name = name.split(".")[-1] if key_name not in iterable_dict.keys(): - if quant_new_params is not None and key_name in quant_new_params: - continue - raise ValueError(f"Can't find match parameter in ckpt,param name = {name}") + if key_name not in quant_new_params: + raise ValueError(f"Can't find match parameter in ckpt,param name = {name}") + continue value_param = next(iterable_dict[key_name], None) - if value_param is not None: + if value_param: param.set_data(value_param[1].data) print(f'init model param {name} with checkpoint param {value_param[0]}') diff --git a/mindspore/context.py b/mindspore/context.py index c42aa51e9c..1a4f5d70a8 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -645,8 +645,8 @@ def set_context(**kwargs): >>> context.set_context(mode=context.GRAPH_MODE, ... device_target="Ascend",device_id=0, save_graphs=True, ... save_graphs_path="/mindspore") - >>> context.set_context(enable_profiling=True, \ - profiling_options='{"output":"/home/data/output","training_trace":"on"}') + >>> context.set_context(enable_profiling=True, + ... profiling_options='{"output":"/home/data/output","training_trace":"on"}') >>> context.set_context(max_device_memory="3.5GB") >>> context.set_context(print_file_path="print.pb") >>> context.set_context(max_call_depth=80) @@ -734,16 +734,14 @@ def set_ps_context(**kwargs): Some other environment variables should also be set for parameter server training mode. These environment variables are listed below: - .. code-block:: - - MS_SERVER_NUM # Server number - MS_WORKER_NUM # Worker number - MS_SCHED_HOST # Scheduler IP address - MS_SCHED_PORT # Scheduler port - MS_ROLE # The role of this process: - # MS_SCHED represents the scheduler, - # MS_WORKER represents the worker, - # MS_PSERVER represents the Server + MS_SERVER_NUM # Server number + MS_WORKER_NUM # Worker number + MS_SCHED_HOST # Scheduler IP address + MS_SCHED_PORT # Scheduler port + MS_ROLE # The role of this process: + MS_SCHED #represents the scheduler, + MS_WORKER #represents the worker, + MS_PSERVER #represents the Server Args: diff --git a/mindspore/train/callback/_checkpoint.py b/mindspore/train/callback/_checkpoint.py index db50fea057..990fe3ff7f 100644 --- a/mindspore/train/callback/_checkpoint.py +++ b/mindspore/train/callback/_checkpoint.py @@ -81,14 +81,14 @@ class CheckpointConfig: Args: save_checkpoint_steps (int): Steps to save checkpoint. Default: 1. - save_checkpoint_seconds (int): Seconds to save checkpoint. Default: 0. - Can't be used with save_checkpoint_steps at the same time. + save_checkpoint_seconds (int): Seconds to save checkpoint. + Can't be used with save_checkpoint_steps at the same time. Default: 0. keep_checkpoint_max (int): Maximum number of checkpoint files can be saved. Default: 5. - keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0. - Can't be used with keep_checkpoint_max at the same time. + keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. + Can't be used with keep_checkpoint_max at the same time. Default: 0. integrated_save (bool): Whether to perform integrated save function in automatic model parallel scene. - Default: True. Integrated save function is only supported in automatic parallel scene, not supported - in manual parallel. + Integrated save function is only supported in automatic parallel scene, not supported + in manual parallel. Default: True. async_save (bool): Whether asynchronous execution saves the checkpoint to a file. Default: False. saved_network (Cell): Network to be saved in checkpoint file. If the saved_network has no relation with the network in training, the initial value of saved_network will be saved. Default: None. @@ -128,6 +128,7 @@ class CheckpointConfig: >>> ckpoint_cb = ModelCheckpoint(prefix='LeNet5', directory='./checkpoint', config=config) >>> model.train(10, dataset, callbacks=ckpoint_cb) """ + def __init__(self, save_checkpoint_steps=1, save_checkpoint_seconds=0, @@ -231,6 +232,7 @@ class ModelCheckpoint(Callback): ValueError: If the prefix is invalid. TypeError: If the config is not CheckpointConfig type. """ + def __init__(self, prefix='CKP', directory=None, config=None): super(ModelCheckpoint, self).__init__() self._latest_ckpt_file_name = "" @@ -310,7 +312,7 @@ class ModelCheckpoint(Callback): """Check whether save checkpoint files or not.""" if self._config.save_checkpoint_steps and self._config.save_checkpoint_steps > 0: if cb_params.cur_step_num >= self._last_triggered_step + self._config.save_checkpoint_steps \ - or force_to_save is True: + or force_to_save is True: return True elif self._config.save_checkpoint_seconds and self._config.save_checkpoint_seconds > 0: self._cur_time = time.time() @@ -333,7 +335,7 @@ class ModelCheckpoint(Callback): if save_ckpt: cur_ckpoint_file = self._prefix + "-" + str(cb_params.cur_epoch_num) + "_" \ - + str(step_num_in_epoch) + ".ckpt" + + str(step_num_in_epoch) + ".ckpt" # update checkpoint file list. self._manager.update_ckpoint_filelist(self._directory, self._prefix) # keep checkpoint files number equal max number. @@ -378,6 +380,7 @@ class ModelCheckpoint(Callback): class CheckpointManager: """Manage checkpoint files according to train_config of checkpoint.""" + def __init__(self): self._ckpoint_filelist = [] diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py index a3e5a2245c..987b6af2c2 100644 --- a/mindspore/train/callback/_summary_collector.py +++ b/mindspore/train/callback/_summary_collector.py @@ -79,15 +79,15 @@ class SummaryCollector(Callback): summary_dir (str): The collected data will be persisted to this directory. If the directory does not exist, it will be created automatically. collect_freq (int): Set the frequency of data collection, it should be greater then zero, - and the unit is `step`. Default: 10. If a frequency is set, we will collect data + and the unit is `step`. If a frequency is set, we will collect data when (current steps % freq) equals to 0, and the first step will be collected at any time. It is important to note that if the data sink mode is used, the unit will become the `epoch`. - It is not recommended to collect data too frequently, which can affect performance. - collect_specified_data (Union[None, dict]): Perform custom operations on the collected data. Default: None. + It is not recommended to collect data too frequently, which can affect performance. Default: 10. + collect_specified_data (Union[None, dict]): Perform custom operations on the collected data. By default, if set to None, all data is collected as the default behavior. You can customize the collected data with a dictionary. For example, you can set {'collect_metric': False} to control not collecting metrics. - The data that supports control is shown below. + The data that supports control is shown below. Default: None. - collect_metric (bool): Whether to collect training metrics, currently only the loss is collected. The first output will be treated as the loss and it will be averaged. @@ -106,14 +106,13 @@ class SummaryCollector(Callback): Optional: True/False. Default: True. - histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page and displayed in MindInsight. This field allows regular strings to control which parameters to collect. - Default: None, it means only the first five parameters are collected. It is not recommended to collect too many parameters at once, as it can affect performance. Note that if you collect too many parameters and run out of memory, the training will fail. + Default: None, it means only the first five parameters are collected. keep_default_action (bool): This field affects the collection behavior of the 'collect_specified_data' field. - Optional: True/False, Default: True. True: it means that after specified data is set, non-specified data is collected as the default behavior. False: it means that after specified data is set, only the specified data is collected, - and the others are not collected. + and the others are not collected. Optional: True/False, Default: True. custom_lineage_data (Union[dict, None]): Allows you to customize the data and present it on the MingInsight lineage page. In the custom data, the type of the key supports str, and the type of value supports str, int and float. Default: None, it means there is no custom data. @@ -121,19 +120,20 @@ class SummaryCollector(Callback): Because TensorSummary data is too large to be compared with other summary data, this parameter is used to reduce its collection. By default, The maximum number of steps for collecting TensorSummary data is 20, but it will not exceed the number of steps for collecting other summary data. - Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`, - when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps, + For example, given `collect_freq=10`, when the total steps is 600, TensorSummary will be collected 20 steps, + while other summary data 61 steps, but when the total steps is 20, both TensorSummary and other summary will be collected 3 steps. Also note that when in parallel mode, the total steps will be split evenly, which will affect the number of steps TensorSummary will be collected. + Default: None, which means to follow the behavior as described above. max_file_size (Optional[int]): The maximum size in bytes of each file that can be written to the disk. - Default: None, which means no limit. For example, to write not larger than 4GB, - specify `max_file_size=4 * 1024**3`. + For example, to write not larger than 4GB, specify `max_file_size=4*1024**3`. + Default: None, which means no limit. export_options (Union[None, dict]): Perform custom operations on the export data. - Default: None, it means that the data is not exported. Note that the size of export files is not limited by the max_file_size. You can customize the export data with a dictionary. For example, you can set {'tensor_format': 'npy'} to export tensor as npy file. The data that supports control is shown below. + Default: None, it means that the data is not exported. - tensor_format (Union[str, None]): Customize the export tensor format. Supports ["npy", None]. Default: None, it means that the tensor is not exported. diff --git a/mindspore/train/summary/summary_record.py b/mindspore/train/summary/summary_record.py index aed25602ad..c04fac9731 100644 --- a/mindspore/train/summary/summary_record.py +++ b/mindspore/train/summary/summary_record.py @@ -110,15 +110,15 @@ class SummaryRecord: file_prefix (str): The prefix of file. Default: "events". file_suffix (str): The suffix of file. Default: "_MS". network (Cell): Obtain a pipeline through network for saving graph summary. Default: None. - max_file_size (int, optional): The maximum size of each file that can be written to disk (in bytes). \ - Unlimited by default. For example, to write not larger than 4GB, specify `max_file_size=4 * 1024**3`. + max_file_size (int, optional): The maximum size of each file that can be written to disk (in bytes). + Unlimited by default. For example, to write not larger than 4GB, specify `max_file_size=4 * 1024 ** 3`. raise_exception (bool, optional): Sets whether to throw an exception when a RuntimeError or OSError exception occurs in recording data. Default: False, this means that error logs are printed and no exception is thrown. export_options (Union[None, dict]): Perform custom operations on the export data. - Default: None, it means that the data is not exported. Note that the size of export files is not limited by the max_file_size. You can customize the export data with a dictionary. For example, you can set {'tensor_format': 'npy'} - to export tensor as npy file. The data that supports control is shown below. + to export tensor as npy file. The data that supports control is shown below. Default: None, it means that + the data is not exported. - tensor_format (Union[str, None]): Customize the export tensor format. Supports ["npy", None]. Default: None, it means that the tensor is not exported.