|
|
|
@ -108,6 +108,7 @@ std::mutex DavinciModel::tvm_bin_mutex_;
|
|
|
|
|
DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener)
|
|
|
|
|
: weights_mem_base_(nullptr),
|
|
|
|
|
var_mem_base_(nullptr),
|
|
|
|
|
fixed_mem_base_(0),
|
|
|
|
|
mem_base_(nullptr),
|
|
|
|
|
is_inner_mem_base_(false),
|
|
|
|
|
is_inner_weight_base_(false),
|
|
|
|
@ -670,6 +671,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
|
|
|
|
|
data_inputer_ = new (std::nothrow) DataInputer();
|
|
|
|
|
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
|
|
|
|
|
}
|
|
|
|
|
fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_);
|
|
|
|
|
GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem");
|
|
|
|
|
|
|
|
|
|
for (const ge::NodePtr &node : compute_graph->GetDirectNode()) {
|
|
|
|
@ -2828,7 +2830,32 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
|
|
|
|
|
return SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) {
|
|
|
|
|
if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) {
|
|
|
|
|
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < io_addrs.size(); ++i) {
|
|
|
|
|
uintptr_t addr = reinterpret_cast<uintptr_t>(io_addrs[i]);
|
|
|
|
|
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) {
|
|
|
|
|
total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_));
|
|
|
|
|
} else {
|
|
|
|
|
total_io_addrs_.emplace_back(io_addrs[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
|
|
|
|
|
if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) {
|
|
|
|
|
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
|
|
|
|
|
uintptr_t addr = reinterpret_cast<uintptr_t>(total_io_addrs[i]);
|
|
|
|
|
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) {
|
|
|
|
|
total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
|
|
|
|
|
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
|
|
|
|
|
if (it_in != knonw_input_data_info_.end()) {
|
|
|
|
|