parent
1f6002edc0
commit
6cf2dcbc1f
@ -0,0 +1,70 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <cuda_profiler_api.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
static std::vector<std::string> kCudaProfileConfiguration = {
|
||||
"gpustarttimestamp",
|
||||
"gpuendtimestamp",
|
||||
"gridsize3d",
|
||||
"threadblocksize",
|
||||
"dynsmemperblock",
|
||||
"stasmemperblock",
|
||||
"regperthread",
|
||||
"memtransfersize",
|
||||
"memtransferdir",
|
||||
"memtransferhostmemtype",
|
||||
"streamid",
|
||||
"cacheconfigrequested",
|
||||
"cacheconfigexecuted",
|
||||
"countermodeaggregate",
|
||||
"enableonstart 0",
|
||||
"active_warps",
|
||||
"active_cycles",
|
||||
};
|
||||
|
||||
void CudaProfilerInit(std::string output_file, std::string output_mode) {
|
||||
std::array<char, 128> buf;
|
||||
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
|
||||
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
|
||||
memcpy(buf.data(), tmpl.data(), tmpl.size());
|
||||
auto result = mktemp(buf.data());
|
||||
PADDLE_ENFORCE(strlen(result) != 0);
|
||||
std::string config = result;
|
||||
|
||||
{
|
||||
std::ofstream ofs(config, std::ios::out | std::ios::trunc);
|
||||
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
|
||||
for (const auto& line : kCudaProfileConfiguration) {
|
||||
ofs << line << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv");
|
||||
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
|
||||
PADDLE_ENFORCE(
|
||||
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
|
||||
}
|
||||
|
||||
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
|
||||
|
||||
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
import paddle.v2.fluid.core as core
|
||||
|
||||
|
||||
def nvporf_init(output_file, output_mode=None):
|
||||
"""
|
||||
Initialize the CUDA profiler.
|
||||
This methods must be called before nvprof_start.
|
||||
|
||||
:param output_file: The output file name.
|
||||
:type output_file: string
|
||||
:param output_mode: The output mode has Key-Value pair format and
|
||||
Comma separated values format.
|
||||
It should be 'key-value' or 'csv'.
|
||||
:type output_mode: string
|
||||
"""
|
||||
if output_mode is None:
|
||||
output_mode = 'csv'
|
||||
if output_mode != 'key-value' or output_mode != 'csv':
|
||||
raise ValueError("The output mode must be 'key-value' or 'csv'.")
|
||||
core.nvprof_init(output_file, output_mode)
|
||||
|
||||
|
||||
def nvporf_start():
|
||||
"""
|
||||
Enables profiler collection by the active CUDA profiling tool.
|
||||
"""
|
||||
core.nvprof_start()
|
||||
|
||||
|
||||
def nvporf_stop():
|
||||
"""
|
||||
Disables profiler collection.
|
||||
"""
|
||||
core.nvprof_stop()
|
||||
|
||||
|
||||
class profiler(object):
|
||||
def __init__(self, output_file, output_mode=None, enabled=True):
|
||||
self.enabled = enabled
|
||||
if not self.enabled:
|
||||
return
|
||||
self.entered = False
|
||||
nvporf_init(output_file, output_mode)
|
||||
|
||||
def __enter__(self):
|
||||
if not self.enabled:
|
||||
return
|
||||
if self.entered:
|
||||
raise RuntimeError("The profiler traces are not reentrant")
|
||||
self.entered = True
|
||||
nvporf_start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_value is not None:
|
||||
raise exc_value
|
||||
if not self.enabled:
|
||||
return
|
||||
nvporf_stop()
|
@ -0,0 +1,17 @@
|
||||
import paddle.v2.fluid.profiler as profiler
|
||||
import paddle.v2.fluid.layers as layers
|
||||
import numpy as np
|
||||
|
||||
place = core.GPUPlace(0)
|
||||
exe = Executor(place)
|
||||
|
||||
epoc = 8
|
||||
dshape = [4, 3, 28, 28]
|
||||
data = layers.data(name='data', shape=dshape, dtype='float32')
|
||||
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
|
||||
|
||||
input = core.LoDTensor()
|
||||
with profiler("cuda_profiler.txt") as nvprof:
|
||||
for i in range(epoc):
|
||||
input.set(np.random.random(dshape).astype("float32"), place)
|
||||
exe.run(framework.default_main_program(), feed={'data': data})
|
Loading…
Reference in new issue