parent
7de625fbca
commit
8d147deb07
File diff suppressed because it is too large
Load Diff
@ -1,11 +1,14 @@
|
||||
if (ENABLE_GPU)
|
||||
if(ENABLE_GPU)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc")
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (ENABLE_D)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc")
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
if(ENABLE_D)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc")
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif ()
|
||||
add_dependencies(_mindspore_profiler_obj mindspore::protobuf)
|
||||
endif()
|
||||
|
@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/common/memory_profiling.h"
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
||||
std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_id) {
|
||||
std::shared_ptr<GraphMemory> node = std::make_shared<GraphMemory>(graph_id);
|
||||
graph_memory_[graph_id] = node;
|
||||
return node;
|
||||
}
|
||||
|
||||
std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) {
|
||||
auto node = graph_memory_.find(graph_id);
|
||||
if (node != graph_memory_.end()) {
|
||||
return node->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MemoryProfiling::MemoryToPB() {
|
||||
memory_proto_.set_total_mem(device_mem_size_);
|
||||
for (const auto &graph : graph_memory_) {
|
||||
GraphMemProto *graph_proto = memory_proto_.add_graph_mem();
|
||||
graph_proto->set_graph_id(graph.second->GetGraphId());
|
||||
graph_proto->set_static_mem(graph.second->GetStaticMemSize());
|
||||
// node memory to PB
|
||||
for (const auto &node : graph.second->GetNodeMemory()) {
|
||||
NodeMemProto *node_mem = graph_proto->add_node_mems();
|
||||
node_mem->set_node_name(node.GetNodeName());
|
||||
node_mem->set_node_id(node.GetNodeId());
|
||||
for (const auto &id : node.GetInputTensorId()) {
|
||||
node_mem->add_input_tensor_id(id);
|
||||
}
|
||||
for (const auto &id : node.GetOutputTensorId()) {
|
||||
node_mem->add_output_tensor_id(id);
|
||||
}
|
||||
for (const auto &id : node.GetOutputTensorId()) {
|
||||
node_mem->add_workspace_tensor_id(id);
|
||||
}
|
||||
}
|
||||
// tensor memory to PB
|
||||
for (const auto &node : graph.second->GetTensorMemory()) {
|
||||
TensorMemProto *tensor_mem = graph_proto->add_tensor_mems();
|
||||
tensor_mem->set_tensor_id(node.GetTensorId());
|
||||
tensor_mem->set_size(node.GetAlignedSize());
|
||||
std::string type = node.GetType();
|
||||
tensor_mem->set_type(type);
|
||||
tensor_mem->set_life_start(node.GetLifeStart());
|
||||
tensor_mem->set_life_end(node.GetLifeEnd());
|
||||
std::string life_long = node.GetLifeLong();
|
||||
tensor_mem->set_life_long(life_long);
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Memory profiling data to PB end";
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryProfiling::SaveMemoryProfiling() {
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
std::string dir_path = context->get_param<std::string>(MS_CTX_PROFILING_DIR_PATH);
|
||||
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
std::string file = dir_path + std::string("/memory_usage_") + std::to_string(device_id) + std::string(".pb");
|
||||
|
||||
MemoryToPB();
|
||||
|
||||
std::fstream handle(file, std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
if (!memory_proto_.SerializeToOstream(&handle)) {
|
||||
MS_LOG(ERROR) << "Save memory profiling data to file failed";
|
||||
}
|
||||
handle.close();
|
||||
MS_LOG(INFO) << "Start save memory profiling data to " << file << " end";
|
||||
return;
|
||||
}
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
@ -0,0 +1,124 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H
|
||||
#define MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H
|
||||
|
||||
#include "proto/memory_profiling.pb.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
||||
class NodeMemory {
|
||||
public:
|
||||
NodeMemory() : node_name_(""), node_id_(0) {}
|
||||
~NodeMemory() = default;
|
||||
|
||||
void SetNodeName(const std::string &name) { node_name_ = name; }
|
||||
void SetNodeId(uint64_t node_id) { node_id_ = node_id; }
|
||||
void AddInputTensorId(uint64_t node_id) { input_tensor_id_.emplace_back(node_id); }
|
||||
void AddOutputTensorId(uint64_t node_id) { output_tensor_id_.emplace_back(node_id); }
|
||||
void AddWorkSpaceTensorId(uint64_t node_id) { workspace_tensor_id_.emplace_back(node_id); }
|
||||
std::string GetNodeName() const { return node_name_; }
|
||||
uint64_t GetNodeId() const { return node_id_; }
|
||||
std::vector<uint64_t> GetInputTensorId() const { return input_tensor_id_; }
|
||||
std::vector<uint64_t> GetOutputTensorId() const { return output_tensor_id_; }
|
||||
std::vector<uint64_t> GetWorkspaceTensorId() const { return workspace_tensor_id_; }
|
||||
|
||||
private:
|
||||
std::string node_name_;
|
||||
uint64_t node_id_;
|
||||
std::vector<uint64_t> input_tensor_id_;
|
||||
std::vector<uint64_t> output_tensor_id_;
|
||||
std::vector<uint64_t> workspace_tensor_id_;
|
||||
};
|
||||
|
||||
class TensorMemory {
|
||||
public:
|
||||
TensorMemory() : tensor_id_(0), size_(0), type_(""), life_start_(0), life_end_(0), life_long_("") {}
|
||||
~TensorMemory() = default;
|
||||
|
||||
void SetTensorId(uint64_t tensor_id) { tensor_id_ = tensor_id; }
|
||||
void SetAlignedSize(uint64_t size) { size_ = size; }
|
||||
void SetType(const std::string &type) { type_ = type; }
|
||||
void SetLifeStart(uint64_t start) { life_start_ = start; }
|
||||
void SetLifeEnd(uint64_t end) { life_end_ = end; }
|
||||
void SetLifeLong(const std::string &life_long) { life_long_ = life_long; }
|
||||
uint64_t GetTensorId() const { return tensor_id_; }
|
||||
uint64_t GetAlignedSize() const { return size_; }
|
||||
std::string GetType() const { return type_; }
|
||||
uint64_t GetLifeStart() const { return life_start_; }
|
||||
uint64_t GetLifeEnd() const { return life_end_; }
|
||||
std::string GetLifeLong() const { return life_long_; }
|
||||
|
||||
private:
|
||||
uint64_t tensor_id_;
|
||||
uint64_t size_; // aligned tensor size
|
||||
std::string type_; // see TensorType in somas_tensor.h
|
||||
uint64_t life_start_; // the exe node id at which tensor memory allocated
|
||||
uint64_t life_end_; // the exe node id at which tensor memory deallocated
|
||||
std::string life_long_; // see LifeLongType in somas_tensor.h
|
||||
};
|
||||
|
||||
class GraphMemory {
|
||||
public:
|
||||
explicit GraphMemory(uint32_t graph_id) : graph_id_(graph_id), static_mem_size_(0) {}
|
||||
~GraphMemory() = default;
|
||||
void AddStaticMemorySize(uint32_t size) { static_mem_size_ += size; }
|
||||
void AddNodeMemory(const NodeMemory &node) { node_memory_.emplace_back(node); }
|
||||
void AddTensorMemory(const TensorMemory &node) { tensor_memory_.emplace_back(node); }
|
||||
uint32_t GetGraphId() const { return graph_id_; }
|
||||
uint32_t GetStaticMemSize() const { return static_mem_size_; }
|
||||
std::vector<NodeMemory> GetNodeMemory() const { return node_memory_; }
|
||||
std::vector<TensorMemory> GetTensorMemory() const { return tensor_memory_; }
|
||||
|
||||
private:
|
||||
uint32_t graph_id_;
|
||||
uint32_t static_mem_size_;
|
||||
std::vector<NodeMemory> node_memory_;
|
||||
std::vector<TensorMemory> tensor_memory_;
|
||||
};
|
||||
|
||||
class MemoryProfiling {
|
||||
public:
|
||||
MemoryProfiling() = default;
|
||||
~MemoryProfiling() = default;
|
||||
|
||||
static MemoryProfiling &GetInstance() {
|
||||
static MemoryProfiling instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
MemoryProto &GetMemProto() { return memory_proto_; }
|
||||
std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
|
||||
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id);
|
||||
void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; }
|
||||
void MemoryToPB();
|
||||
void SaveMemoryProfiling();
|
||||
|
||||
private:
|
||||
MemoryProto memory_proto_;
|
||||
std::map<uint32_t, std::shared_ptr<GraphMemory>> graph_memory_;
|
||||
uint64_t device_mem_size_;
|
||||
};
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif
|
@ -0,0 +1,50 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package mindspore.profiler;
|
||||
|
||||
message MemoryProto {
|
||||
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
|
||||
int64 total_mem = 2; // total allocated device memory
|
||||
}
|
||||
|
||||
message GraphMemProto {
|
||||
int64 graph_id = 1; // graph id
|
||||
int64 static_mem = 2; // size of allocated static memory for current graph
|
||||
repeated NodeMemProto node_mems = 3; // execution nodes
|
||||
repeated TensorMemProto tensor_mems = 4; // all tensors
|
||||
string fp_start = 5; // node name of fp start
|
||||
string bp_end = 6; // node name of bp end
|
||||
}
|
||||
|
||||
message NodeMemProto {
|
||||
string node_name = 1; // node name
|
||||
int64 node_id = 2; // node id with respect to the execution order
|
||||
repeated int64 input_tensor_id = 3; // input tensor id
|
||||
repeated int64 output_tensor_id = 4; // output tensor id
|
||||
repeated int64 workspace_tensor_id = 5; // workspace tensor id
|
||||
}
|
||||
|
||||
message TensorMemProto {
|
||||
int64 tensor_id = 1; // tensor id
|
||||
int64 size = 2; // aligned tensor size
|
||||
string type = 3; // tensor type, e.g. Common, OutputOnly
|
||||
int64 life_start = 4; // the exe node id at which tensor memory allocated
|
||||
int64 life_end = 5; // the exe node id at which tensor memory deallocated
|
||||
string life_long = 6; // see LifeLongType enum
|
||||
}
|
Loading…
Reference in new issue