From aa42728e849df670c1a560f72289e2615c51689c Mon Sep 17 00:00:00 2001 From: zhangyunshu Date: Wed, 20 Jan 2021 17:12:37 +0800 Subject: [PATCH] profiler mem usage: added memory breakdowns for each execution id --- mindspore/ccsrc/CMakeLists.txt | 4 +- .../profiler/common/proto_files/__init__.py | 15 - .../common/proto_files/memory_usage.proto | 50 --- .../common/proto_files/memory_usage_pb2.py | 295 -------------- mindspore/profiler/parser/container.py | 63 +-- .../profiler/parser/memory_usage_parser.py | 371 +++++++++--------- 6 files changed, 213 insertions(+), 585 deletions(-) delete mode 100644 mindspore/profiler/common/proto_files/__init__.py delete mode 100644 mindspore/profiler/common/proto_files/memory_usage.proto delete mode 100644 mindspore/profiler/common/proto_files/memory_usage_pb2.py diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 9492990e33..cc062c9c09 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -126,8 +126,8 @@ list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS}) include_directories("${CMAKE_BINARY_DIR}/profiler/device/common") file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "profiler/device/common/memory_profiling.proto") -ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST}) -list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS}) +ms_protobuf_generate_py(PROFILER_MEM_PROTO_PY PROFILER_MEM_PROTO_HDRS_PY PROFILER_MEM_PROTO_PYS ${PROFILER_PROTO_LIST}) +list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_PY}) if(ENABLE_DEBUGGER) # debugger: compile proto files diff --git a/mindspore/profiler/common/proto_files/__init__.py b/mindspore/profiler/common/proto_files/__init__.py deleted file mode 100644 index 8e5afe6be7..0000000000 --- a/mindspore/profiler/common/proto_files/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""The proto files for profiler.""" diff --git a/mindspore/profiler/common/proto_files/memory_usage.proto b/mindspore/profiler/common/proto_files/memory_usage.proto deleted file mode 100644 index b053d2a19f..0000000000 --- a/mindspore/profiler/common/proto_files/memory_usage.proto +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -syntax = "proto3"; - -package profiler; - -message MemoryProto { - repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs - int64 total_mem = 2; // total allocated memory on device -} - -message GraphMemProto { - int64 graph_id = 1; // graph id - int64 static_mem = 2; // size of allocated static memory for current graph - repeated NodeMemProto node_mems = 3; // execution nodes - repeated TensorMemProto tensor_mems = 4; // all tensors - string fp_start = 5; // node name of fp start - string bp_end = 6; // node name of bp end -} - -message NodeMemProto { - string node_name = 1; // node name - int64 node_id = 2; // node id with respect to the execution order - repeated int64 input_tensor_id = 3; // input tensor id - repeated int64 output_tensor_id = 4; // output tensor id - repeated int64 workspace_tensor_id = 5; // workspace tensor id -} - -message TensorMemProto { - int64 tensor_id = 1; // tensor id - int64 size = 2; // aligned tensor size - string type = 3; // tensor type, e.g. Common, OutputOnly - int64 life_start = 4; // node id at which memory allocated - int64 life_end = 5; // node id at which memory deallocated - string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll -} diff --git a/mindspore/profiler/common/proto_files/memory_usage_pb2.py b/mindspore/profiler/common/proto_files/memory_usage_pb2.py deleted file mode 100644 index 2cad50059c..0000000000 --- a/mindspore/profiler/common/proto_files/memory_usage_pb2.py +++ /dev/null @@ -1,295 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: mindspore/profiler/common/proto_files/memory_usage.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='mindspore/profiler/common/proto_files/memory_usage.proto', - package='profiler', - syntax='proto3', - serialized_options=None, - serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3') -) - - - - -_MEMORYPROTO = _descriptor.Descriptor( - name='MemoryProto', - full_name='profiler.MemoryProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1, - number=2, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=70, - serialized_end=146, -) - - -_GRAPHMEMPROTO = _descriptor.Descriptor( - name='GraphMemProto', - full_name='profiler.GraphMemProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0, - number=1, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1, - number=2, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2, - number=3, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3, - number=4, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4, - number=5, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5, - number=6, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=149, - serialized_end=326, -) - - -_NODEMEMPROTO = _descriptor.Descriptor( - name='NodeMemProto', - full_name='profiler.NodeMemProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='node_name', full_name='profiler.NodeMemProto.node_name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='node_id', full_name='profiler.NodeMemProto.node_id', index=1, - number=2, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2, - number=3, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3, - number=4, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4, - number=5, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=329, - serialized_end=459, -) - - -_TENSORMEMPROTO = _descriptor.Descriptor( - name='TensorMemProto', - full_name='profiler.TensorMemProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0, - number=1, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='size', full_name='profiler.TensorMemProto.size', index=1, - number=2, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='type', full_name='profiler.TensorMemProto.type', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='life_start', full_name='profiler.TensorMemProto.life_start', index=3, - number=4, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='life_end', full_name='profiler.TensorMemProto.life_end', index=4, - number=5, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='life_long', full_name='profiler.TensorMemProto.life_long', index=5, - number=6, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=461, - serialized_end=581, -) - -_MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO -_GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO -_GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO -DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO -DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO -DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO -DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), { - 'DESCRIPTOR' : _MEMORYPROTO, - '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' - # @@protoc_insertion_point(class_scope:profiler.MemoryProto) - }) -_sym_db.RegisterMessage(MemoryProto) - -GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), { - 'DESCRIPTOR' : _GRAPHMEMPROTO, - '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' - # @@protoc_insertion_point(class_scope:profiler.GraphMemProto) - }) -_sym_db.RegisterMessage(GraphMemProto) - -NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), { - 'DESCRIPTOR' : _NODEMEMPROTO, - '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' - # @@protoc_insertion_point(class_scope:profiler.NodeMemProto) - }) -_sym_db.RegisterMessage(NodeMemProto) - -TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), { - 'DESCRIPTOR' : _TENSORMEMPROTO, - '__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2' - # @@protoc_insertion_point(class_scope:profiler.TensorMemProto) - }) -_sym_db.RegisterMessage(TensorMemProto) - - -# @@protoc_insertion_point(module_scope) diff --git a/mindspore/profiler/parser/container.py b/mindspore/profiler/parser/container.py index ec5c488d77..476545dd6d 100644 --- a/mindspore/profiler/parser/container.py +++ b/mindspore/profiler/parser/container.py @@ -13,8 +13,6 @@ # limitations under the License. # ============================================================================ """The container of metadata used in profiler parser.""" -import heapq - GIGABYTES = 1024 * 1024 * 1024 @@ -131,6 +129,7 @@ class MemoryGraph: self.bp_end = None self.lines = [] self.nodes = {} + self.breakdowns = [] def to_dict(self): """Convert Graph to dict.""" @@ -140,7 +139,8 @@ class MemoryGraph: 'nodes': self.nodes, 'fp_start': self.fp_start, 'bp_end': self.bp_end, - 'lines': self.lines + 'lines': self.lines, + 'breakdowns': self.breakdowns } return graph @@ -152,17 +152,15 @@ class MemoryNode: Args: node_proto (proto): Node proto. - graph_id (int): Graph id. """ - def __init__(self, node_proto, graph_id): + def __init__(self, node_proto): self._node_proto = node_proto - self.graph_id = graph_id self.node_id = node_proto.node_id self.name = node_proto.node_name self.fullname = "" - self.input_ids = [t_id for t_id in node_proto.input_tensor_id] - self.output_ids = [t_id for t_id in node_proto.output_tensor_id] - self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id] + self.input_ids = list(node_proto.input_tensor_id) + self.output_ids = list(node_proto.output_tensor_id) + self.workspace_ids = list(node_proto.workspace_tensor_id) self.inputs = [] self.outputs = [] self.workspaces = [] @@ -181,8 +179,7 @@ class MemoryNode: 'size': self.size, 'allocated': self.mem_change, 'inputs': self.inputs, - 'outputs': self.outputs, - 'workspaces': self.workspaces + 'outputs': self.outputs } return node @@ -194,9 +191,8 @@ class MemoryTensor: Args: tensor_proto (proto): Tensor proto. - graph_id (int): Graph id. """ - def __init__(self, tensor_proto, graph_id): + def __init__(self, tensor_proto): self._tensor_proto = tensor_proto self.tensor_id = tensor_proto.tensor_id self.life_long = tensor_proto.life_long @@ -204,48 +200,25 @@ class MemoryTensor: self.life_end = tensor_proto.life_end self.size = tensor_proto.size / GIGABYTES self.type = tensor_proto.type - self.graph_id = graph_id + self.shape = "" + self.format = "" + self.dtype = "" + self.source_node = "" + self.name = "" def to_dict(self): """Convert Tensor to a dict.""" tensor = { + 'tensor_name': self.name, 'tensor_id': self.tensor_id, 'size': self.size, 'type': self.type, + 'shape': self.shape, + 'format': self.format, + 'data_type': self.dtype, 'life_long': self.life_long, 'life_start': self.life_start, 'life_end': self.life_end } return tensor - - -class MemoryQueue: - """ - A priority queue to keep specified number of active nodes in memory activities. - - Args: - size (int): The upper limit of nodes to be saved. - """ - def __init__(self, size): - self._queue = [] - self._index = 0 - self._size = size - - def push(self, item, priority): - """ - Push a node into MemoryQueue. - - Args: - item (tuple): Node item including id, name, etc. - priority (int): The priority of the item. - """ - if self._index < self._size: - heapq.heappush(self._queue, (-priority, item)) - self._index += 1 - else: - heapq.heappushpop(self._queue, (-priority, item)) - - def get_items(self): - """Get the elements in MemoryQueue.""" - return self._queue diff --git a/mindspore/profiler/parser/memory_usage_parser.py b/mindspore/profiler/parser/memory_usage_parser.py index f2b9755c57..413180542b 100644 --- a/mindspore/profiler/parser/memory_usage_parser.py +++ b/mindspore/profiler/parser/memory_usage_parser.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ """Memory Usage Parser.""" +from collections import OrderedDict import json import os import stat @@ -22,12 +23,11 @@ from google.protobuf.text_format import ParseError from mindspore import log as logger from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \ ProfilerFileNotFoundException, ProfilerRawFileException -from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path from mindspore.profiler.parser.container import MemoryGraph as Graph from mindspore.profiler.parser.container import MemoryNode as Node -from mindspore.profiler.parser.container import MemoryQueue from mindspore.profiler.parser.container import MemoryTensor as Tensor +from mindspore.train.memory_profiling_pb2 import MemoryProto GIGABYTES = 1024 * 1024 * 1024 @@ -47,11 +47,10 @@ class MemoryUsageParser: 'allocations': 0, 'deallocations': 0, 'peak_mem': 0, - 'static_mem': 0, - 'breakdowns': [] + 'static_mem': 0 } - self._active_nodes = MemoryQueue(size=10) self._framework = {} + self._points = {} def _get_file_path(self): """Get the proto file path.""" @@ -73,7 +72,9 @@ class MemoryUsageParser: logger.info("Start to load memory usage data from pb file") file_path = self._get_file_path() self._framework = self._process_framework_info(aicore_detail_data) + self._points = points + # Open memory protobuf file. try: with open(file_path, 'rb') as f: content = f.read() @@ -81,106 +82,175 @@ class MemoryUsageParser: logger.error('Failed to read memory file: %s', err) raise ProfilerIOException - model_proto = MemoryProto() + # Parse memory raw data from file. + memory_proto = MemoryProto() try: - model_proto.ParseFromString(content) + memory_proto.ParseFromString(content) except ParseError as err: msg = "Fail to parse memory proto file." logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err) raise ProfilerRawFileException(msg) - graphs = model_proto.graph_mem - self._graphs_dict = self._parse_graphs(graphs, points) - self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES + # Parse memory details based on graphs in the network. + graphs = memory_proto.graph_mem + self._parse_graph_memory(graphs) + # Update memory summary information. + self._mem_summary['capacity'] = memory_proto.total_mem / GIGABYTES self._mem_summary['peak_mem'] = self._peak_mem - self._process_memory_breakdowns() logger.info('Finished processing memory usage data.') - def _parse_graphs(self, graphs, points): - """Parse subgraphs.""" - graphs_dict = {} + def _parse_graph_memory(self, graphs): + """Parse memory usage based on subgraphs.""" for graph_proto in graphs: graph_id = graph_proto.graph_id if graph_id is None: logger.info('Graph id is missing, skipped the graph.') continue - graph = Graph(graph_proto) + graph_parser = GraphMemoryParser(graph_proto, self._points, self._framework) + graph = graph_parser.parse_graph() + if graph: + self._graphs_dict[graph_id] = graph - # process tensors in the graph - tensors_proto = graph_proto.tensor_mems - if not tensors_proto: - logger.info('No tensor in graph %s, skipped.', graph_id) - continue - tensors_dict = self._parse_tensors(tensors_proto, graph_id) + # update global memory usage data + self._peak_mem = max(self._peak_mem, graph_parser.peak_mem) + self._mem_summary['static_mem'] += graph_parser.static_mem + self._mem_summary['allocations'] += graph_parser.allocations + self._mem_summary['deallocations'] += graph_parser.deallocations - # calculate memory usage of the graph by number of nodes and details of tensors - nodes_proto = graph_proto.node_mems - # init memory usage list with static memory - mem_change = [graph.static_mem for _ in range(len(nodes_proto))] - self._calc_mem_change(mem_change, tensors_dict) - graph.lines = mem_change + def _write_memory_files(self, filename, content): + """Write the summary and top breakdowns of memory usage.""" + file_path = os.path.join(self._profiling_dir, filename) + file_path = validate_and_normalize_path(file_path) - # process nodes in graph - graph.nodes = self._parse_nodes( - nodes_proto, mem_change, tensors_dict, graph - ) + try: + with open(file_path, 'w') as json_file: + json.dump(content, json_file) + os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE) + except (IOError, OSError) as err: + logger.error('Fail to write memory file.\n%s', err) + raise ProfilerIOException - # update fp_start and bp_end - point_id = self._locate_fp_bp_id(points, graph.nodes) - graph.fp_start = point_id.get('fp_start') - graph.bp_end = point_id.get('bp_end') + def write_memory_files(self): + """Write memory files.""" + logger.info('Start recording memory data into files...') + # write memory summary to json file + summary_filename = self._summary_filename.format(self._device_id) + self._write_memory_files(summary_filename, self._mem_summary) - graphs_dict.update({graph_id: graph.to_dict()}) + # write memory details to json file + details_filename = self._details_filename.format(self._device_id) + self._write_memory_files(details_filename, self._graphs_dict) + logger.info('Successfully write memory data into files.') - self._mem_summary['static_mem'] += graph.static_mem - self._mem_summary['allocations'] += len(tensors_dict) + 1 - self._mem_summary['deallocations'] += len(tensors_dict) + 1 - self._peak_mem = max(max(mem_change), self._peak_mem) + @staticmethod + def _process_framework_info(aicore_detail_data): + """Process framework info.""" + framework_info_dict = {} + for framework_obj in aicore_detail_data: + op_name = framework_obj[0] + op_full_name = framework_obj[4] + op_info = framework_obj[5] + framework_info_dict[op_name] = { + 'fullname': op_full_name, + 'name': op_name, + 'args': op_info + } + + return framework_info_dict - return graphs_dict - @staticmethod - def _parse_tensors(tensors_proto, graph_id): +class GraphMemoryParser: + """Parse memory usage data for each graph.""" + def __init__(self, graph_proto, points, framework): + self.graph = None + self.nodes = OrderedDict() + self.tensors = OrderedDict() + self._framework = framework + self._points = points + self._graph_proto = graph_proto + self.peak_mem = 0 + self.static_mem = 0 + self.allocations = 0 + self.deallocations = 0 + self._mem_change = [] + self.breakdowns = [] + self._lifetime = [] + + def parse_graph(self): + """Parse memory usage data for subgraphs.""" + graph_dict = {} + self.graph = Graph(self._graph_proto) + # process tensors in the graph + tensors_proto = self._graph_proto.tensor_mems + if not tensors_proto: + logger.info('No tensor in graph %s, skipped.', self.graph.graph_id) + return graph_dict + self._parse_tensors(tensors_proto) + + # calculate memory usage of the graph by number of nodes and details of tensors + nodes_proto = self._graph_proto.node_mems + # init memory usage list with static memory + self._mem_change = [self.graph.static_mem for _ in range(len(nodes_proto))] + self._lifetime = [[] for _ in range(len(nodes_proto))] + self._calc_mem_change() # update self._mem_change and self._lifetime + self.graph.lines = self._mem_change + + # process nodes in graph + self.graph.nodes = self._parse_nodes(nodes_proto) + + self._process_memory_breakdowns() + self.graph.breakdowns = self.breakdowns + + # update fp_start and bp_end + point_id = self._locate_fp_bp_id() + self.graph.fp_start = point_id.get('fp_start') + self.graph.bp_end = point_id.get('bp_end') + + graph_dict = self.graph.to_dict() + + self.static_mem = self.graph.static_mem + self.allocations = len(self.tensors) + self.deallocations = len(self.tensors) + self.peak_mem = max(max(self._mem_change), self.peak_mem) + + return graph_dict + + def _parse_tensors(self, tensors_proto): """Parse tensors.""" - tensors_dict = {} for tensor_proto in tensors_proto: - tensor = Tensor(tensor_proto, graph_id) - tensors_dict.update({tensor.tensor_id: tensor}) + tensor = Tensor(tensor_proto) + self.tensors.update({tensor.tensor_id: tensor}) - return tensors_dict - - def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph): + def _parse_nodes(self, nodes_proto): """Parse nodes.""" - nodes_dict = {} + nodes_list = [] for index, node_proto in enumerate(nodes_proto): - node = Node(node_proto, graph.graph_id) - tensors = set(node.output_ids + node.workspace_ids) - node.size = self._calc_node_memory(tensors, tensors_dict) - node.allocations = len(tensors) - node.deallocations = len(tensors) + node = Node(node_proto) + # Calculate memory size allocated for this node + tensor_ids = set(node.output_ids + node.workspace_ids) + node.size = self._calc_node_memory(tensor_ids) + node.allocations = len(tensor_ids) + node.deallocations = len(tensor_ids) # calculate the allocated/deallocated memory size on the node if index == 0: - node.mem_change = mem_change[index] - graph.static_mem + node.mem_change = self._mem_change[index] - self.graph.static_mem else: - node.mem_change = mem_change[index] - mem_change[index-1] - - self._update_nodes(node, tensors_dict) - nodes_dict[node.name] = node.to_dict() + node.mem_change = self._mem_change[index] - self._mem_change[index-1] - # update active nodes - self._active_nodes.push( - item=(node.name, node.node_id, node.size, graph.graph_id), - priority=-node.size # priority is the negative value of node size - ) + self._update_nodes(node) + self._update_tensor_source(node) + self.nodes[node.name] = node + nodes_list.append(node.to_dict()) - return nodes_dict + return nodes_list - def _update_nodes(self, node, tensors_dict): + def _update_nodes(self, node): """Update nodes.""" - skipped = self._find_conflict_tensors(node) + # Remove duplicate tensors + self._remove_duplicate_tensors(node) name = node.name if self._framework and name in self._framework: node_frame = self._framework[name] @@ -192,59 +262,38 @@ class MemoryUsageParser: else: node.outputs.append(value) - node.inputs = self._fill_tensor_dict( - node.inputs, node.input_ids, tensors_dict, 'input' - ) - node.outputs = self._fill_tensor_dict( - node.outputs, node.output_ids, tensors_dict, 'output' - ) - node.workspaces = self._fill_tensor_dict( - node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped - ) + def _update_tensor_source(self, node): + """Update source node for tensors.""" + for t_id in node.output_ids: + tensor = self.tensors.get(t_id) + tensor.source_node = node.name @staticmethod - def _find_conflict_tensors(node): + def _remove_duplicate_tensors(node): """Find conflict tensors in node.""" - output_list = [] - if node.output_ids: - output_list = node.output_ids - skipped = [] if node.workspace_ids: - for t_id in node.workspace_ids: - if t_id in output_list: - skipped.append(t_id) - - return skipped - - @staticmethod - def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None): - """Fill tensor dict.""" - full_list = [] - for t_id, io_dict in zip(tensor_ids, node_ios): - if tensor_type == 'workspace' and t_id in skipped: - continue - tensor = tensors_dict.get(t_id) - tensor.type = tensor_type - io_dict.update(tensor.to_dict()) - full_list.append(io_dict) - - return full_list - - @staticmethod - def _calc_node_memory(tensors, tensors_dict): + i = 0 + while i < len(node.workspace_ids): + t_id = node.workspace_ids[i] + if t_id in node.output_ids: + del node.workspace_ids[i] # remove duplicate tensor + continue + i += 1 + + def _calc_node_memory(self, tensor_ids): """Calculate the allocated memory for the node.""" node_mem = 0 - for t_id in tensors: - tensor = tensors_dict[t_id] + for t_id in tensor_ids: + tensor = self.tensors[t_id] size = tensor.size node_mem += size return node_mem - def _calc_mem_change(self, mem_change, tensors_dict): + def _calc_mem_change(self): """Calculate the memory change for the subgraph.""" - node_num = len(mem_change) - for tensor_id, tensor in tensors_dict.items(): + node_num = len(self._mem_change) + for tensor_id, tensor in self.tensors.items(): life_long = tensor.life_long life_start = tensor.life_start life_end = tensor.life_end @@ -255,101 +304,67 @@ class MemoryUsageParser: if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end tensor.life_start = 0 tensor.life_end = node_num - self._update_mem_change(mem_change, size, 0, node_num) + self._update_mem_change(size, 0, node_num, tensor_id) elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end if life_end is not None and life_end >= 0: tensor.life_start = 0 - self._update_mem_change(mem_change, size, 0, life_end+1) + self._update_mem_change(size, 0, life_end+1, tensor_id) else: logger.info('Cannot locate lifetime end for tensor: %s', tensor_id) elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end if life_start is not None and life_start <= node_num: tensor.life_end = node_num - self._update_mem_change(mem_change, size, life_start, node_num) + self._update_mem_change(size, life_start, node_num, tensor_id) else: logger.info('Cannot locate lifetime start for tensor: %s', tensor_id) elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end if life_start is not None and life_end is not None and life_start <= life_end: - self._update_mem_change(mem_change, size, life_start, life_end+1) + self._update_mem_change(size, life_start, life_end+1, tensor_id) else: logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id) - @staticmethod - def _update_mem_change(mem_change, size, start, end): + def _update_mem_change(self, size, start, end, tensor_id): """Update memory change for the subgraph.""" for i in range(start, end): - mem_change[i] += size + self._mem_change[i] += size + # Update tensor lifetime list. + self._lifetime[i].append(tensor_id) - @staticmethod - def _locate_fp_bp_id(points, nodes): + def _locate_fp_bp_id(self): """Locate the node id of fp_start and bp_end in graph.""" point_id = { 'fp_start': None, 'bp_end': None } - fp_start = points.get('fp_start') - bp_end = points.get('bp_end') + fp_start = self._points.get('fp_start') + bp_end = self._points.get('bp_end') fp_name = fp_start.split('/')[-1] if fp_start else "" bp_name = bp_end.split('/')[-1] if bp_end else "" - if fp_name in nodes: - point_id['fp_start'] = nodes[fp_name].get('node_id') - if bp_name in nodes: - point_id['bp_end'] = nodes[bp_name].get('node_id') + if fp_name in self.nodes: + point_id['fp_start'] = self.nodes[fp_name].node_id + if bp_name in self.nodes: + point_id['bp_end'] = self.nodes[bp_name].node_id return point_id - def _write_memory_files(self, filename, content): - """Write the summary and top breakdowns of memory usage.""" - file_path = os.path.join(self._profiling_dir, filename) - file_path = validate_and_normalize_path(file_path) - - try: - with open(file_path, 'w') as json_file: - json.dump(content, json_file) - os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE) - except (IOError, OSError) as err: - logger.error('Fail to write memory file.\n%s', err) - raise ProfilerIOException - - def write_memory_files(self): - """Write memory files.""" - logger.info('Start recording memory data into files...') - # write memory summary to json file - summary = self._summary_filename.format(self._device_id) - self._write_memory_files(summary, self._mem_summary) - - # write memory details to json file - details = self._details_filename.format(self._device_id) - self._write_memory_files(details, self._graphs_dict) - logger.info('Successfully write memory data into files.') - def _process_memory_breakdowns(self): - """Process memory breakdowns.""" - breakdowns = [] - active_nodes = self._active_nodes.get_items() - for _, node_meta in active_nodes: - node_name, _, _, graph_id = node_meta - graph = self._graphs_dict[graph_id] - nodes_dict = graph.get('nodes') - node = nodes_dict.get(node_name) - if 'inputs' in node: - node.pop('inputs') - breakdowns.append(node) - - self._mem_summary['breakdowns'] = breakdowns - - @staticmethod - def _process_framework_info(aicore_detail_data): - """Process framework info.""" - framework_info_dict = {} - for framework_obj in aicore_detail_data: - op_name = framework_obj[0] - op_full_name = framework_obj[4] - op_info = framework_obj[5] - framework_info_dict[op_name] = { - 'fullname': op_full_name, - 'name': op_name, - 'args': op_info - } - - return framework_info_dict + """Process memory breakdowns for each node.""" + self.breakdowns = [[] for _ in range(len(self.nodes))] + for index, breakdown in enumerate(self._lifetime): + for t_id in breakdown: + tensor = self.tensors.get(t_id) + source_node = tensor.source_node + if not source_node: + continue + node = self.nodes.get(source_node) + for i, output_id in enumerate(node.output_ids): + if t_id == output_id: + output = node.outputs[i] if i < len(node.outputs) else {} + tensor.name = node.name + ':' + str(i) + tensor.shape = output.get('shape') + tensor.dtype = output.get('data_type') + tensor.format = output.get('format') + tensor.type = 'output' + + tensor_dict = tensor.to_dict() + self.breakdowns[index].append(tensor_dict)