|
|
|
@ -73,7 +73,7 @@ class SummaryCollector(Callback):
|
|
|
|
|
summary_dir (str): The collected data will be persisted to this directory.
|
|
|
|
|
If the directory does not exist, it will be created automatically.
|
|
|
|
|
collect_freq (int): Set the frequency of data collection, it should be greater then zero,
|
|
|
|
|
and the unit is `step`. Default: 10.
|
|
|
|
|
and the unit is `step`. Default: 10. The first step will be recorded at any time.
|
|
|
|
|
It is important to note that if the data sink mode is used, the unit will become the `epoch`.
|
|
|
|
|
It is not recommended to collect data too frequently, which can affect performance.
|
|
|
|
|
collect_specified_data (Union[None, dict]): Perform custom operations on the collected data. Default: None.
|
|
|
|
@ -142,9 +142,6 @@ class SummaryCollector(Callback):
|
|
|
|
|
'histogram_regular': None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# _OPTIMIZER_FAILED means find optimizer failed, so we will not collect data about optimizer.
|
|
|
|
|
_OPTIMIZER_FAILED = 'Failed'
|
|
|
|
|
|
|
|
|
|
def __init__(self, summary_dir, collect_freq=10, collect_specified_data=None,
|
|
|
|
|
keep_default_action=True, custom_lineage_data=None):
|
|
|
|
|
super(SummaryCollector, self).__init__()
|
|
|
|
@ -158,7 +155,8 @@ class SummaryCollector(Callback):
|
|
|
|
|
self._check_action(keep_default_action)
|
|
|
|
|
|
|
|
|
|
self._collect_specified_data = self._process_specified_data(collect_specified_data, keep_default_action)
|
|
|
|
|
logger.info(f"For `collect_specified_data` the value after processing is: {self._collect_specified_data}.")
|
|
|
|
|
msg = f"For 'collect_specified_data' the value after processing is: {self._collect_specified_data}."
|
|
|
|
|
logger.info(msg)
|
|
|
|
|
|
|
|
|
|
self._check_custom_lineage_data(custom_lineage_data)
|
|
|
|
|
self._custom_lineage_data = custom_lineage_data
|
|
|
|
@ -167,6 +165,7 @@ class SummaryCollector(Callback):
|
|
|
|
|
self._has_saved_train_network = False
|
|
|
|
|
self._has_saved_custom_data = False
|
|
|
|
|
self._is_parse_loss_success = True
|
|
|
|
|
self._first_step = True
|
|
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
|
self._record = SummaryRecord(log_dir=self._summary_dir)
|
|
|
|
@ -228,7 +227,7 @@ class SummaryCollector(Callback):
|
|
|
|
|
if specified_data is None:
|
|
|
|
|
if action:
|
|
|
|
|
return self._DEFAULT_SPECIFIED_DATA
|
|
|
|
|
return None
|
|
|
|
|
return dict()
|
|
|
|
|
|
|
|
|
|
check_value_type('collect_specified_data', specified_data, [dict, type(None)])
|
|
|
|
|
|
|
|
|
@ -282,9 +281,13 @@ class SummaryCollector(Callback):
|
|
|
|
|
cb_params = run_context.original_args()
|
|
|
|
|
|
|
|
|
|
if cb_params.mode == ModeEnum.TRAIN.value:
|
|
|
|
|
if cb_params.cur_step_num % self._collect_freq:
|
|
|
|
|
|
|
|
|
|
# Make sure the first step data is recorded
|
|
|
|
|
if not self._first_step and cb_params.cur_step_num % self._collect_freq:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self._first_step = False
|
|
|
|
|
|
|
|
|
|
if not self._has_saved_train_network:
|
|
|
|
|
self._collect_graphs(cb_params)
|
|
|
|
|
|
|
|
|
@ -357,7 +360,8 @@ class SummaryCollector(Callback):
|
|
|
|
|
input_data = input_data[0]
|
|
|
|
|
try:
|
|
|
|
|
self._record.add_value(PluginEnum.IMAGE.value, 'input_data/auto', input_data)
|
|
|
|
|
except ValueError:
|
|
|
|
|
except ValueError as ex:
|
|
|
|
|
logger.warning(str(ex))
|
|
|
|
|
self._collect_specified_data['collect_input_data'] = False
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
@ -395,7 +399,12 @@ class SummaryCollector(Callback):
|
|
|
|
|
loss = self._get_loss(cb_params)
|
|
|
|
|
if loss is None:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
self._record.add_value(PluginEnum.SCALAR.value, 'loss/auto', loss)
|
|
|
|
|
except ValueError as exc:
|
|
|
|
|
logger.warning(str(exc))
|
|
|
|
|
self._collect_specified_data['collect_metric'] = False
|
|
|
|
|
|
|
|
|
|
def _get_loss(self, cb_params):
|
|
|
|
|
"""
|
|
|
|
@ -446,7 +455,9 @@ class SummaryCollector(Callback):
|
|
|
|
|
Returns:
|
|
|
|
|
Union[Optimizer, None], if parse optimizer success, will return a optimizer, else return None.
|
|
|
|
|
"""
|
|
|
|
|
if self._optimizer == self._OPTIMIZER_FAILED:
|
|
|
|
|
# 'optimizer_failed' means find optimizer failed, so we will not collect data about optimizer.
|
|
|
|
|
optimizer_failed = 'Failed'
|
|
|
|
|
if self._optimizer == optimizer_failed:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if self._optimizer is not None:
|
|
|
|
@ -458,9 +469,11 @@ class SummaryCollector(Callback):
|
|
|
|
|
optimizer = self._parse_optimizer_by_network(network)
|
|
|
|
|
|
|
|
|
|
if optimizer is None or not isinstance(optimizer, Optimizer):
|
|
|
|
|
logger.warning("Can not find optimizer in network, or the optimizer does not inherit Mindpore's optimizer, "
|
|
|
|
|
"so we will not collect data about optimizer in SummaryCollector.")
|
|
|
|
|
optimizer = self._OPTIMIZER_FAILED
|
|
|
|
|
logger.warning("Can not find optimizer in network, or the optimizer does not inherit MindSpore's "
|
|
|
|
|
"optimizer, so we will not collect data about optimizer in SummaryCollector.")
|
|
|
|
|
optimizer = None
|
|
|
|
|
|
|
|
|
|
self._optimizer = optimizer if optimizer is not None else optimizer_failed
|
|
|
|
|
|
|
|
|
|
return optimizer
|
|
|
|
|
|
|
|
|
@ -469,6 +482,8 @@ class SummaryCollector(Callback):
|
|
|
|
|
"""Parse optimizer from network, if parse success will return a optimizer, else return None."""
|
|
|
|
|
optimizer = None
|
|
|
|
|
for _, cell in network.cells_and_names():
|
|
|
|
|
if isinstance(cell, Optimizer):
|
|
|
|
|
return cell
|
|
|
|
|
try:
|
|
|
|
|
optimizer = getattr(cell, 'optimizer')
|
|
|
|
|
except AttributeError:
|
|
|
|
@ -489,11 +504,11 @@ class SummaryCollector(Callback):
|
|
|
|
|
if 'histogram_regular' not in self._collect_specified_data:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self._optimizer = self._get_optimizer(cb_params)
|
|
|
|
|
if self._optimizer is None:
|
|
|
|
|
optimizer = self._get_optimizer(cb_params)
|
|
|
|
|
if optimizer is None:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
parameters = self._optimizer.parameters
|
|
|
|
|
parameters = optimizer.parameters
|
|
|
|
|
regular = self._collect_specified_data.get('histogram_regular')
|
|
|
|
|
if regular is not None:
|
|
|
|
|
for parameter in parameters:
|
|
|
|
@ -538,7 +553,7 @@ class SummaryCollector(Callback):
|
|
|
|
|
train_lineage[LineageMetadata.loss] = None
|
|
|
|
|
|
|
|
|
|
optimizer = self._get_optimizer(cb_params)
|
|
|
|
|
learning_rate = self._get_learning_rate(optimizer)
|
|
|
|
|
learning_rate = self._get_learning_rate(optimizer) if optimizer is not None else None
|
|
|
|
|
|
|
|
|
|
if learning_rate is not None:
|
|
|
|
|
train_lineage[LineageMetadata.learning_rate] = list(np.atleast_1d(learning_rate.asnumpy()))[0]
|
|
|
|
|