You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
graphengine/ge/ge_runtime/task/cce_task.cc

161 lines
5.4 KiB

5 years ago
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ge_runtime/task/cce_task.h"
#include "ge_runtime/task/task_factory.h"
namespace ge {
namespace model_runner {
CceTask::CceTask(const ModelContext &model_context, const std::shared_ptr<CceTaskInfo> &task_info)
: TaskRepeater<CceTaskInfo>(model_context, task_info),
task_info_(task_info),
stream_(nullptr),
stub_func_(nullptr),
args_(nullptr),
sm_desc_(nullptr),
flowtable_(nullptr),
is_flowtable_(false) {
if (task_info_ == nullptr) {
GELOGW("task_info_ is null!");
return;
5 years ago
}
auto stream_list = model_context.stream_list();
if (stream_list.size() == 1) {
stream_ = stream_list[0];
} else if (stream_list.size() > task_info->stream_id()) {
stream_ = stream_list[task_info->stream_id()];
} else {
GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size());
}
}
CceTask::~CceTask() {
FreeRtMem(&args_);
FreeRtMem(&flowtable_);
rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE;
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
}
sm_desc_ = nullptr;
}
void CceTask::FreeRtMem(void **ptr) noexcept {
if (ptr == nullptr || *ptr == nullptr) {
return;
}
rtError_t ret = rtFree(*ptr);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
}
*ptr = nullptr;
}
bool CceTask::Distribute() {
GELOGI("Distribute CCETask start.");
if (stream_ == nullptr) {
GELOGE(PARAM_INVALID, "stream_ is null!");
return false;
}
// Get stub_func
if (task_info_->stub_func().empty()) {
GELOGE(PARAM_INVALID, "kernel_info->stub_func is empty!");
return false;
}
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(task_info_->stub_func().c_str()), &stub_func_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtGetFunctionByName failed, ret: 0x%X", rt_ret);
stub_func_ = nullptr;
return false;
}
GELOGI("CCETask: stub_func = %s [%p].", task_info_->stub_func().c_str(), stub_func_);
5 years ago
// Flowtable
if (is_flowtable_) {
rt_ret = rtMalloc(&flowtable_, task_info_->flow_table().size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "task information.", task_info_->flow_table().size())
5 years ago
rt_ret = rtMemcpy(flowtable_, task_info_->flow_table().size(), task_info_->flow_table().data(),
task_info_->flow_table().size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
// Modify flowtable addr in args
auto args = const_cast<uint8_t *>(task_info_->args().data());
auto task_offset = reinterpret_cast<uint16_t *>(const_cast<uint8_t *>(task_info_->args_offset().data()));
5 years ago
if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) {
GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu",
5 years ago
static_cast<uint32_t>(task_offset[0]), sizeof(uint64_t), task_info_->args().size());
return false;
}
*(reinterpret_cast<uintptr_t *>(args + task_offset[0])) = reinterpret_cast<uintptr_t>(flowtable_);
}
// Args
rt_ret = rtMalloc(&args_, task_info_->args_size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "task information.", task_info_->args_size())
5 years ago
rt_ret = rtMemcpy(args_, task_info_->args_size(), task_info_->args().data(), task_info_->args_size(),
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
// L2 sm_desc
if (!task_info_->sm_desc().empty()) {
rt_ret = rtMemAllocManaged(&sm_desc_, task_info_->sm_desc().size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(),
5 years ago
task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
}
// Kernel launch
rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(),
static_cast<rtSmDesc_t *>(sm_desc_), stream_);
5 years ago
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
return true;
}
REGISTER_TASK(TaskInfoType::CCE, CceTask, CceTaskInfo);
5 years ago
} // namespace model_runner
} // namespace ge