|
|
|
@ -19,63 +19,102 @@
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include "session/kernel_graph.h"
|
|
|
|
|
#include "utils/contract.h"
|
|
|
|
|
|
|
|
|
|
namespace mindspore {
|
|
|
|
|
namespace device {
|
|
|
|
|
namespace ascend {
|
|
|
|
|
struct ProfilingTraceInfo {
|
|
|
|
|
// execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
|
|
|
|
|
std::string profiling_trace_begin;
|
|
|
|
|
std::string trace_begin;
|
|
|
|
|
// get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
|
|
|
|
|
std::string profiling_trace_bp_end;
|
|
|
|
|
std::string trace_bp_end;
|
|
|
|
|
// execute order's end execute (like: Conv2DBackpropFilter)
|
|
|
|
|
std::string profiling_trace_netoutput;
|
|
|
|
|
std::string trace_netoutput;
|
|
|
|
|
|
|
|
|
|
std::string profiling_allreduce1_start;
|
|
|
|
|
|
|
|
|
|
std::string profiling_allreduce1_end;
|
|
|
|
|
|
|
|
|
|
std::string profiling_allreduce2_start;
|
|
|
|
|
|
|
|
|
|
std::string profiling_allreduce2_end;
|
|
|
|
|
// profiling specific op, such as AllReduce;
|
|
|
|
|
std::set<std::string> trace_custom_node;
|
|
|
|
|
|
|
|
|
|
// 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
|
|
|
|
|
// 2. op lanuch get task info with callback func.
|
|
|
|
|
// 3. insert profiling_trace_bp_end.
|
|
|
|
|
// 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
|
|
|
|
|
|
|
|
|
|
bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); }
|
|
|
|
|
bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct ProfilingContent {
|
|
|
|
|
// true -send data from device to host and finish profiling
|
|
|
|
|
bool notify;
|
|
|
|
|
uint64_t profiler_trace_id;
|
|
|
|
|
uint32_t flags;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class ProfilingUtils {
|
|
|
|
|
public:
|
|
|
|
|
ProfilingUtils() = default;
|
|
|
|
|
~ProfilingUtils() = default;
|
|
|
|
|
static bool GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
|
|
|
|
|
ProfilingTraceInfo *profiling_trace_info);
|
|
|
|
|
static void ProfilingTraceFpStart(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
|
|
|
|
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
|
|
|
|
|
static void ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
|
|
|
|
int job_id, const std::string &profiling_node_name,
|
|
|
|
|
std::vector<CNodePtr> *kernel_list);
|
|
|
|
|
static void ProfilingTraceEnd(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
|
|
|
|
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
|
|
|
|
|
|
|
|
|
|
// Insert job_id profiling node and fp_start profiling node.
|
|
|
|
|
// Job_id is got from envs, which shound be a number greater than 255
|
|
|
|
|
// Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
|
|
|
|
|
static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr,
|
|
|
|
|
NotNull<std::vector<CNodePtr> *> kernel_list);
|
|
|
|
|
|
|
|
|
|
// Insert net output profiling node, which tells the device to stop profiling.
|
|
|
|
|
// The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
|
|
|
|
|
static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr,
|
|
|
|
|
NotNull<std::vector<CNodePtr> *> kernel_list);
|
|
|
|
|
|
|
|
|
|
// Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
|
|
|
|
|
static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr,
|
|
|
|
|
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
|
|
|
|
|
|
|
|
|
|
// Mapping graph id and the kernels' name in the graph
|
|
|
|
|
static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
|
|
|
|
|
|
|
|
|
|
// Mapping task_id and kernel name for device to generate the time cost of specific kernel.
|
|
|
|
|
// Device calculate the time cost of the task which is marked by task id.
|
|
|
|
|
// But we need data of (kernel name , time cost)
|
|
|
|
|
static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids);
|
|
|
|
|
|
|
|
|
|
static const char kProfiling[];
|
|
|
|
|
static const char kNotify[];
|
|
|
|
|
static const char kProfilerTraceId[];
|
|
|
|
|
static const char kFlags[];
|
|
|
|
|
// Get profiling trace point from envs.
|
|
|
|
|
// export PROFILING_FP_START='full name of the first cnode to execute'
|
|
|
|
|
// export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
|
|
|
|
|
// export PROFILING_ITER_END='full name of last cnode in graph to execute'
|
|
|
|
|
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
|
|
|
|
|
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
|
|
|
|
|
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
|
|
|
|
|
static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
|
|
|
|
|
|
|
|
|
|
// Insert two profiling trace points, one in front and one behind
|
|
|
|
|
static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr,
|
|
|
|
|
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
|
|
|
|
|
|
|
|
|
|
inline static constexpr char kProfiling[] = "Profiling";
|
|
|
|
|
inline static constexpr char kNotify[] = "notify";
|
|
|
|
|
inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
|
|
|
|
|
inline static constexpr char kFlags[] = "flags";
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output);
|
|
|
|
|
static CNodePtr CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
|
|
|
|
|
uint64_t profiler_trace_id, uint32_t flags);
|
|
|
|
|
static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr);
|
|
|
|
|
static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content,
|
|
|
|
|
NotNull<session::KernelGraph *> graph_ptr);
|
|
|
|
|
static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
|
|
|
|
|
static std::string GetTraceBpEnd();
|
|
|
|
|
static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
|
|
|
|
|
|
|
|
|
|
// graph id --> (kernel name list)
|
|
|
|
|
static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
|
|
|
|
|
static uint32_t custom_node_index_;
|
|
|
|
|
};
|
|
|
|
|
} // namespace ascend
|
|
|
|
|
} // namespace device
|
|
|
|
|