Add memory profiler (#16137)

test=develop
revert-16144-rnn_mem_opt
chengduo 6 years ago committed by GitHub
parent 05993c3ff3
commit 0979956619
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -3,7 +3,7 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
if (WITH_GPU) if (WITH_GPU)

@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
@ -24,9 +22,11 @@
#endif #endif
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h" #include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/split.h" #include "paddle/fluid/string/split.h"
@ -329,18 +329,22 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
} // namespace legacy } // namespace legacy
namespace allocation { namespace allocation {
LegacyMemMonitor GPUMemMonitor; LegacyMemMonitor GPUMemMonitor;
Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_); void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
return new Allocation(ptr, size, place_); auto *tmp_alloc = new Allocation(ptr, size, place_);
platform::MemEvenRecorder::Instance().PushMemRecord(
static_cast<void *>(tmp_alloc), place_, size);
return tmp_alloc;
} }
void LegacyAllocator::Free(Allocation *allocation) { void LegacyAllocator::Free(Allocation *allocation) {
boost::apply_visitor( boost::apply_visitor(
legacy::FreeVisitor(allocation->ptr(), allocation->size()), legacy::FreeVisitor(allocation->ptr(), allocation->size()),
allocation->place()); allocation->place());
platform::MemEvenRecorder::Instance().PopMemRecord(
static_cast<void *>(allocation), place_);
delete allocation; delete allocation;
} }

@ -11,7 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h"
#include <deque> #include <deque>
#include <forward_list> #include <forward_list>
@ -30,6 +29,8 @@ limitations under the License. */
#include "glog/logging.h" #include "glog/logging.h"
#include "google/protobuf/text_format.h" #include "google/protobuf/text_format.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
@ -317,6 +318,24 @@ class DeviceTracerImpl : public DeviceTracer {
stream_id, correlation_id, bytes}); stream_id, correlation_id, bytes});
} }
void AddMemInfoRecord(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place &place, const std::string &alloc_in,
const std::string &free_in, int64_t thread_id) {
if (0 == start_ns || 0 == end_ns) {
VLOG(3) << alloc_in << ", " << free_in << " Cannot be traced.";
return;
}
thread_local std::forward_list<MemInfoRecord> *local_mem_info_record =
nullptr;
if (local_mem_info_record == nullptr) {
std::lock_guard<std::mutex> l(trace_mu_);
mem_info_record_.emplace_front();
local_mem_info_record = &mem_info_record_.front();
}
local_mem_info_record->emplace_front(MemInfoRecord{
start_ns, end_ns, bytes, place, thread_id, alloc_in, free_in});
}
void AddActiveKindRecords(const std::string &anno, uint64_t start_ns, void AddActiveKindRecords(const std::string &anno, uint64_t start_ns,
uint64_t end_ns, int64_t device_id, uint64_t end_ns, int64_t device_id,
int64_t thread_id, uint32_t correlation_id) { int64_t thread_id, uint32_t correlation_id) {
@ -409,6 +428,7 @@ class DeviceTracerImpl : public DeviceTracer {
correlations_.clear(); correlations_.clear();
for (auto &tmp : correlations_pairs) tmp.clear(); for (auto &tmp : correlations_pairs) tmp.clear();
for (auto &tmp : cpu_records_) tmp.clear(); for (auto &tmp : cpu_records_) tmp.clear();
for (auto &tmp : mem_info_record_) tmp.clear();
for (auto &tmp : active_kind_records_) tmp.clear(); for (auto &tmp : active_kind_records_) tmp.clear();
} }
@ -440,9 +460,12 @@ class DeviceTracerImpl : public DeviceTracer {
proto::Profile profile_pb; proto::Profile profile_pb;
profile_pb.set_start_ns(start_ns_); profile_pb.set_start_ns(start_ns_);
profile_pb.set_end_ns(end_ns_); profile_pb.set_end_ns(end_ns_);
if (correlations_.empty()) if (correlations_.empty()) {
for (auto &tmp : correlations_pairs) for (auto &tmp : correlations_pairs) {
for (auto &pair : tmp) correlations_[pair.first] = pair.second; for (auto &pair : tmp) correlations_[pair.first] = pair.second;
}
}
for (const KernelRecord &r : kernel_records_) { for (const KernelRecord &r : kernel_records_) {
auto *event = profile_pb.add_events(); auto *event = profile_pb.add_events();
event->set_type(proto::Event::GPUKernel); event->set_type(proto::Event::GPUKernel);
@ -462,6 +485,7 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.device_id); event->set_device_id(r.device_id);
} }
VLOG(1) << "KernelRecord event miss: " << miss << " find: " << find; VLOG(1) << "KernelRecord event miss: " << miss << " find: " << find;
for (auto &tmp : cpu_records_) { for (auto &tmp : cpu_records_) {
for (const CPURecord &r : tmp) { for (const CPURecord &r : tmp) {
auto *event = profile_pb.add_events(); auto *event = profile_pb.add_events();
@ -473,6 +497,7 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.device_id); event->set_device_id(r.device_id);
} }
} }
for (auto &tmp : active_kind_records_) { for (auto &tmp : active_kind_records_) {
for (const ActiveKindRecord &r : tmp) { for (const ActiveKindRecord &r : tmp) {
auto *event = profile_pb.add_events(); auto *event = profile_pb.add_events();
@ -510,6 +535,31 @@ class DeviceTracerImpl : public DeviceTracer {
event->mutable_memcopy()->set_bytes(r.bytes); event->mutable_memcopy()->set_bytes(r.bytes);
} }
VLOG(1) << "MemRecord event miss: " << miss << " find: " << find; VLOG(1) << "MemRecord event miss: " << miss << " find: " << find;
for (auto &tmp : mem_info_record_) {
for (const auto &r : tmp) {
auto *event = profile_pb.add_mem_events();
event->set_device_id(0);
if (platform::is_cpu_place(r.place)) {
event->set_place(proto::MemEvent::CPUPlace);
} else if (platform::is_gpu_place(r.place)) {
event->set_place(proto::MemEvent::CUDAPlace);
event->set_device_id(
boost::get<platform::CUDAPlace>(r.place).GetDeviceId());
} else if (platform::is_cuda_pinned_place(r.place)) {
event->set_place(proto::MemEvent::CUDAPinnedPlace);
} else {
PADDLE_THROW("The current place is not supported.");
}
event->set_alloc_in(r.alloc_in);
event->set_free_in(r.free_in);
event->set_start_ns(r.start_ns);
event->set_end_ns(r.end_ns);
event->set_bytes(r.bytes);
event->set_thread_id(r.thread_id);
}
}
std::ofstream profile_f; std::ofstream profile_f;
profile_f.open(profile_path, profile_f.open(profile_path,
std::ios::out | std::ios::trunc | std::ios::binary); std::ios::out | std::ios::trunc | std::ios::binary);
@ -553,6 +603,7 @@ class DeviceTracerImpl : public DeviceTracer {
std::forward_list<KernelRecord> kernel_records_; std::forward_list<KernelRecord> kernel_records_;
std::forward_list<MemRecord> mem_records_; std::forward_list<MemRecord> mem_records_;
std::forward_list<std::forward_list<CPURecord>> cpu_records_; std::forward_list<std::forward_list<CPURecord>> cpu_records_;
std::forward_list<std::forward_list<MemInfoRecord>> mem_info_record_;
std::forward_list<std::forward_list<ActiveKindRecord>> active_kind_records_; std::forward_list<std::forward_list<ActiveKindRecord>> active_kind_records_;
std::forward_list<std::forward_list<std::pair<uint32_t, Event *>>> std::forward_list<std::forward_list<std::pair<uint32_t, Event *>>>
correlations_pairs; correlations_pairs;
@ -575,7 +626,7 @@ Event *CurAnnotation() {
return annotation_stack.back(); return annotation_stack.back();
} }
std::string CurAnnotationName() { std::string CurAnnotationName() {
if (annotation_stack.empty()) return ""; if (annotation_stack.empty()) return "Unknown";
return annotation_stack.back()->name(); return annotation_stack.back()->name();
} }

@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cupti.h" #include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.pb.h" #include "paddle/fluid/platform/profiler.pb.h"
@ -47,6 +48,7 @@ class DeviceTracer {
int64_t stream_id; int64_t stream_id;
uint32_t correlation_id; uint32_t correlation_id;
}; };
struct CPURecord { struct CPURecord {
std::string name; std::string name;
uint64_t start_ns; uint64_t start_ns;
@ -54,6 +56,7 @@ class DeviceTracer {
int64_t device_id; int64_t device_id;
int64_t thread_id; int64_t thread_id;
}; };
struct MemRecord { struct MemRecord {
std::string name; std::string name;
uint64_t start_ns; uint64_t start_ns;
@ -63,6 +66,17 @@ class DeviceTracer {
uint32_t correlation_id; uint32_t correlation_id;
uint64_t bytes; uint64_t bytes;
}; };
struct MemInfoRecord {
uint64_t start_ns;
uint64_t end_ns;
size_t bytes;
Place place;
int64_t thread_id;
std::string alloc_in;
std::string free_in;
};
struct ActiveKindRecord { struct ActiveKindRecord {
std::string name; std::string name;
uint64_t start_ns; uint64_t start_ns;
@ -71,6 +85,7 @@ class DeviceTracer {
int64_t thread_id; int64_t thread_id;
uint32_t correlation_id; uint32_t correlation_id;
}; };
virtual ~DeviceTracer() {} virtual ~DeviceTracer() {}
// Needs to be called once before use. // Needs to be called once before use.
virtual void Enable() = 0; virtual void Enable() = 0;
@ -97,6 +112,12 @@ class DeviceTracer {
int64_t thread_id, int64_t thread_id,
uint32_t correlation_id) = 0; uint32_t correlation_id) = 0;
virtual void AddMemInfoRecord(uint64_t start_ns, uint64_t end_ns,
size_t bytes, const Place& place,
const std::string& alloc_in,
const std::string& free_in,
int64_t thread_id) = 0;
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation // Add a cuda kernel stats. `correlation_id` will be mapped to annotation
// added before for human readability. // added before for human readability.
virtual void AddKernelRecords(std::string name, uint64_t start, uint64_t end, virtual void AddKernelRecords(std::string name, uint64_t start, uint64_t end,

@ -13,10 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#endif #endif
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
@ -64,5 +66,36 @@ class Event {
#endif #endif
#endif #endif
}; };
class MemEvent {
public:
MemEvent(EventType type, uint64_t start_ns, uint64_t end_ns, size_t bytes,
Place place, int64_t thread_id, const std::string& annotation)
: type_(type),
start_ns_(start_ns),
end_ns_(end_ns),
bytes_(bytes),
place_(place),
thread_id_(thread_id),
annotation_(annotation) {}
const EventType& type() const { return type_; }
uint64_t start_ns() const { return start_ns_; }
uint64_t end_ns() const { return end_ns_; }
size_t bytes() const { return bytes_; }
Place place() const { return place_; }
int64_t thread_id() const { return thread_id_; }
const std::string& annotation() const { return annotation_; }
private:
EventType type_;
uint64_t start_ns_ = 0;
uint64_t end_ns_ = 0;
size_t bytes_;
Place place_;
int64_t thread_id_;
std::string annotation_;
};
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle

File diff suppressed because it is too large Load Diff

@ -15,10 +15,17 @@ limitations under the License. */
#pragma once #pragma once
#include <forward_list> #include <forward_list>
#include <list> #include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#endif #endif
@ -34,8 +41,41 @@ enum ProfilerState {
void Mark(const std::string& name); void Mark(const std::string& name);
Event* PushEvent(const std::string& name); void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place);
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place);
struct MemEvenRecorder {
public:
void PushMemRecord(const void* ptr, const Place& place, size_t size);
void PopMemRecord(const void* ptr, const Place& place);
void Flush();
static MemEvenRecorder& Instance() { return recorder; }
private:
struct RecordMemEvent {
RecordMemEvent(const Place& place, size_t bytes);
~RecordMemEvent();
Place place_;
size_t bytes_;
uint64_t start_ns_;
uint64_t end_ns_;
std::string alloc_in_;
std::string free_in_;
};
static MemEvenRecorder recorder;
std::map<Place,
std::unordered_map<const void*, std::unique_ptr<RecordMemEvent>>>
address_memevent_;
std::mutex mtx_;
MemEvenRecorder() {}
DISABLE_COPY_AND_ASSIGN(MemEvenRecorder);
};
Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name); void PopEvent(const std::string& name);
struct RecordEvent { struct RecordEvent {
@ -87,6 +127,41 @@ enum EventSortingKey {
kGPUTime kGPUTime
}; };
template <typename T>
struct EventList {
constexpr static size_t kMB = 1024 * 1024;
constexpr static size_t kEventBlockSize = 16 * kMB;
constexpr static size_t kEventSize = sizeof(T);
constexpr static size_t kEventAlign = alignof(T);
constexpr static size_t kNumBlock =
kEventBlockSize /
((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign);
template <typename... Args>
T* Record(Args&&... args) {
if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) {
event_blocks.emplace_front();
event_blocks.front().reserve(kNumBlock);
}
event_blocks.front().emplace_back(std::forward<Args>(args)...);
return &event_blocks.front().back();
}
std::vector<T> Reduce() {
std::vector<T> result;
for (auto& block : event_blocks) {
result.insert(result.begin(), std::make_move_iterator(block.begin()),
std::make_move_iterator(block.end()));
}
event_blocks.clear();
return result;
}
void Clear() { event_blocks.clear(); }
std::forward_list<std::vector<T>> event_blocks;
};
// Enable the profiling function. // Enable the profiling function.
void EnableProfiler(ProfilerState state); void EnableProfiler(ProfilerState state);

@ -34,8 +34,25 @@ message Event {
optional string detail_info = 9; optional string detail_info = 9;
} }
message MemEvent {
enum Place {
CUDAPlace = 0;
CPUPlace = 1;
CUDAPinnedPlace = 2;
}
optional uint64 start_ns = 1;
optional uint64 end_ns = 2;
optional uint64 bytes = 3;
optional Place place = 4;
optional uint64 thread_id = 5;
optional uint32 device_id = 6;
optional string alloc_in = 7;
optional string free_in = 8;
}
message Profile { message Profile {
repeated Event events = 1; repeated Event events = 1;
optional uint64 start_ns = 2; optional uint64 start_ns = 2;
optional uint64 end_ns = 3; optional uint64 end_ns = 3;
repeated MemEvent mem_events = 4;
} }

@ -95,6 +95,22 @@ class _ChromeTraceFormatter(object):
event['args'] = args event['args'] = args
self._events.append(event) self._events.append(event)
def emit_counter(self, category, name, pid, timestamp, counter, value):
"""Emits a record for a single counter.
Args:
category: The event category as string
name: The event name as string
pid: Identifier of the process generating this event as integer
timestamp: The timestamps of this event as long integer
counter: Name of the counter as string
value: Value of the counter as integer
tid: Thread id of the allocation as integer
"""
event = self._create_event('C', category, name, pid, 0, timestamp)
event['args'] = {counter: value}
self._events.append(event)
def format_to_string(self, pretty=False): def format_to_string(self, pretty=False):
"""Formats the chrome trace to a string. """Formats the chrome trace to a string.
@ -117,6 +133,7 @@ class Timeline(object):
self._profile_dict = profile_dict self._profile_dict = profile_dict
self._pid = 0 self._pid = 0
self._devices = dict() self._devices = dict()
self._mem_devices = dict()
self._chrome_trace = _ChromeTraceFormatter() self._chrome_trace = _ChromeTraceFormatter()
def _allocate_pid(self): def _allocate_pid(self):
@ -143,6 +160,45 @@ class Timeline(object):
self._devices[(k, event.device_id, "GPUKernel")] = pid self._devices[(k, event.device_id, "GPUKernel")] = pid
self._chrome_trace.emit_pid("%s:gpu:%d" % self._chrome_trace.emit_pid("%s:gpu:%d" %
(k, event.device_id), pid) (k, event.device_id), pid)
for mevent in profile_pb.mem_events:
if mevent.place == profiler_pb2.MemEvent.CUDAPlace:
if (k, mevent.device_id, "GPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "GPU")] = pid
self._chrome_trace.emit_pid(
"memory usage on %s:gpu:%d" % (k, mevent.device_id),
pid)
elif mevent.place == profiler_pb2.MemEvent.CPUPlace:
if (k, mevent.device_id, "CPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "CPU")] = pid
self._chrome_trace.emit_pid(
"memory usage on %s:cpu:%d" % (k, mevent.device_id),
pid)
elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace:
if (k, mevent.device_id, "CUDAPinnedPlace"
) not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id,
"CUDAPinnedPlace")] = pid
self._chrome_trace.emit_pid(
"memory usage on %s:cudapinnedplace:%d" %
(k, mevent.device_id), pid)
if (k, 0, "CPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, 0, "CPU")] = pid
self._chrome_trace.emit_pid("memory usage on %s:cpu:%d" %
(k, 0), pid)
if (k, 0, "GPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, 0, "GPU")] = pid
self._chrome_trace.emit_pid("memory usage on %s:gpu:%d" %
(k, 0), pid)
if (k, 0, "CUDAPinnedPlace") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, 0, "CUDAPinnedPlace")] = pid
self._chrome_trace.emit_pid(
"memory usage on %s:cudapinnedplace:%d" % (k, 0), pid)
def _allocate_events(self): def _allocate_events(self):
for k, profile_pb in six.iteritems(self._profile_dict): for k, profile_pb in six.iteritems(self._profile_dict):
@ -163,9 +219,57 @@ class Timeline(object):
event.start_ns, (event.end_ns - event.start_ns) / 1.0, pid, event.start_ns, (event.end_ns - event.start_ns) / 1.0, pid,
event.sub_device_id, 'Op', event.name, args) event.sub_device_id, 'Op', event.name, args)
def _allocate_memory_event(self):
place_to_str = {
profiler_pb2.MemEvent.CPUPlace: "CPU",
profiler_pb2.MemEvent.CUDAPlace: "GPU",
profiler_pb2.MemEvent.CUDAPinnedPlace: "CUDAPinnedPlace"
}
for k, profile_pb in six.iteritems(self._profile_dict):
mem_list = []
end_profiler = 0
for mevent in profile_pb.mem_events:
crt_info = dict()
crt_info['time'] = mevent.start_ns
crt_info['size'] = mevent.bytes
if mevent.place in place_to_str:
place = place_to_str[mevent.place]
else:
place = "UnDefine"
crt_info['place'] = place
pid = self._mem_devices[(k, mevent.device_id, place)]
crt_info['pid'] = pid
crt_info['thread_id'] = mevent.thread_id
crt_info['device_id'] = mevent.device_id
mem_list.append(crt_info)
crt_info = dict()
crt_info['place'] = place
crt_info['pid'] = pid
crt_info['thread_id'] = mevent.thread_id
crt_info['device_id'] = mevent.device_id
crt_info['time'] = mevent.end_ns
crt_info['size'] = -mevent.bytes
mem_list.append(crt_info)
end_profiler = max(end_profiler, crt_info['time'])
mem_list.sort(key=lambda tmp: (tmp.get('time', 0)))
i = 0
total_size = 0
while i < len(mem_list):
total_size += mem_list[i]['size']
while i < len(mem_list) - 1 and mem_list[i]['time'] == mem_list[
i + 1]['time']:
total_size += mem_list[i + 1]['size']
i += 1
self._chrome_trace.emit_counter(
"Memory", "Memory", mem_list[i]['pid'], mem_list[i]['time'],
0, total_size)
i += 1
def generate_chrome_trace(self): def generate_chrome_trace(self):
self._allocate_pids() self._allocate_pids()
self._allocate_events() self._allocate_events()
self._allocate_memory_event()
return self._chrome_trace.format_to_string() return self._chrome_trace.format_to_string()

Loading…
Cancel
Save