!1131 invoke sub kernel with tiling_key in dynamic shape & all scene

From: @HW_KK
Reviewed-by: 
Signed-off-by:
pull/1131/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit fbf9ece38e

@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
op_index = task_def.kernel_with_handle().context().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
continue;
@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
}
auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
}

File diff suppressed because it is too large Load Diff

@ -28,6 +28,32 @@
namespace ge {
namespace hybrid {
class TbeHandleHolder {
public:
TbeHandleHolder(void *bin_handle);
~TbeHandleHolder();
void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }
private:
friend class TbeHandleRegistry;
void *bin_handle_ = nullptr;
};
class TbeHandleRegistry {
public:
static TbeHandleRegistry &GetInstance() {
static TbeHandleRegistry instance;
return instance;
}
bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder);
private:
std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_;
};
class AiCoreOpTask {
public:
AiCoreOpTask() = default;
@ -67,6 +93,9 @@ class AiCoreOpTask {
Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
Status InitTilingInfo(const OpDesc &op_desc);
Status RegisterTbeHandle(const OpDesc &op_desc);
Status RegisterKernelHandle(const OpDesc &op_desc);
Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def);
Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def);
std::string stub_name_;
void *stub_func_ = nullptr;
@ -76,6 +105,11 @@ class AiCoreOpTask {
bool clear_atomic_ = true;
bool is_single_op_ = false;
std::vector<int> output_indices_to_skip_;
string original_kernel_key_;
string node_info_;
uint32_t tiling_key_ = 0;
void *handle_ = nullptr;
bool is_dynamic_ = false;
};
class AtomicAddrCleanOpTask : public AiCoreOpTask {

@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
auto ret = BuildKernelTask(task_def, &tbe_task);
if (ret != SUCCESS) {
return ret;
}
@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
}
}
Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) {
Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
GE_CHECK_NOTNULL(task);
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def);
auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
auto ret = builder.BuildTask(*tbe_task, model_params_);
if (ret != SUCCESS) {
delete tbe_task;
@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
}
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
task_def.DebugString().c_str());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;

@ -67,7 +67,7 @@ class SingleOpModel {
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);

@ -93,6 +93,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size
op_desc_ = op_desc;
}
void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc,
const domi::KernelDefWithHandle &kernel_def_with_handle) {
SetKernelArgs(std::move(args), arg_size, block_dim, op_desc);
original_kernel_key_ = kernel_def_with_handle.original_kernel_key();
node_info_ = kernel_def_with_handle.node_info();
}
void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; }
void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
@ -165,6 +173,10 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; }
uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
void TbeOpTask::SetHandle(void *handle) {
this->handle_ = handle;
}
Status TbeOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
@ -204,8 +216,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
}
block_dim_ = run_info.block_dim;
tiling_data_ = run_info.tiling_data.str();
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_,
tiling_data_.size());
tiling_key_ = run_info.tiling_key;
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
tiling_data_.size(), tiling_key_);
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces");
return SUCCESS;
@ -329,8 +342,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
}
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
if (handle_ == nullptr) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
} else {
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
}
return SUCCESS;
}

@ -78,6 +78,8 @@ class TbeOpTask : public OpTask {
void SetSmDesc(void *sm_desc);
void SetStubFunc(const std::string &name, const void *stub_func);
void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc);
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);
Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) override;
@ -87,6 +89,7 @@ class TbeOpTask : public OpTask {
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
uint32_t GetTaskType() const override;
void SetHandle(void *handle);
private:
friend class SingleOpModel;
@ -107,6 +110,11 @@ class TbeOpTask : public OpTask {
std::string tiling_data_;
std::vector<void *> workspaces_;
NodePtr node_;
uint32_t tiling_key_ = 0;
void* handle_ = nullptr;
std::string original_kernel_key_;
std::string node_info_;
};
class AiCpuBaseTask : public OpTask {

File diff suppressed because it is too large Load Diff

@ -42,6 +42,19 @@ class KernelHolder {
std::shared_ptr<ge::OpKernelBin> kernel_bin_;
};
class HandleHolder {
public:
HandleHolder(void *bin_handle);
~HandleHolder();
void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }
private:
friend class HandleRegistry;
void *bin_handle_ = nullptr;
};
class KernelBinRegistry {
public:
static KernelBinRegistry &GetInstance() {
@ -61,9 +74,22 @@ class KernelBinRegistry {
std::mutex mutex_;
};
class HandleRegistry {
public:
static HandleRegistry &GetInstance() {
static HandleRegistry instance;
return instance;
}
bool AddHandle(std::unique_ptr<HandleHolder> &&holder);
private:
std::set<std::unique_ptr<HandleHolder>> registered_handles_;
};
class TbeTaskBuilder {
public:
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def);
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def);
~TbeTaskBuilder() = default;
Status BuildTask(TbeOpTask &task, const SingleOpModelParam &param);
@ -71,9 +97,11 @@ class TbeTaskBuilder {
private:
Status InitTilingInfo(TbeOpTask &task);
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const;
Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param);
Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param);
Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle,
const SingleOpModelParam &param);
Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam &param) const;
@ -83,8 +111,11 @@ class TbeTaskBuilder {
const NodePtr node_;
const OpDescPtr op_desc_;
const domi::TaskDef &task_def_;
const domi::KernelDef &kernel_def_;
const domi::KernelDefWithHandle &kernel_def_with_handle_;
const std::string stub_name_;
void *handle_ = nullptr;
};
} // namespace ge

@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char
rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }
rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }
rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; }
rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) {
return RT_ERROR_NONE;
}
rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc,
rtStream_t stream) {
return RT_ERROR_NONE;

@ -763,12 +763,17 @@ set(SINGLE_OP_TEST_FILES
#"single_op/single_op_model_unittest.cc"
"single_op/single_op_manager_unittest.cc"
"single_op/stream_resource_unittest.cc"
"single_op/single_op_task_unittest.cc"
)
set(PROFILING_MNG_TEST_FILES
"profiling/ge_profiling_manager_unittest.cc"
)
set(HYBRID_TEST_FILES
"hybrid/ge_hybrid_unittest.cc"
)
set(OTHERS_TEST_FILES
"plugin_manager/ge_util_unittest.cc"
)
@ -1064,6 +1069,7 @@ add_executable(ut_libge_distinct_load_utest
${DISTINCT_GRAPH_LOAD_SRC_FILES}
${SINGLE_OP_TEST_FILES}
${PROFILING_MNG_TEST_FILES}
${HYBRID_TEST_FILES}
)
target_compile_options(ut_libge_distinct_load_utest PRIVATE

@ -0,0 +1,101 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <vector>
#include "runtime/rt.h"
#define protected public
#define private public
#include "hybrid/model/hybrid_model_builder.h"
#include "hybrid/model/hybrid_model.h"
#include "model/ge_model.h"
#include "model/ge_root_model.h"
#include "hybrid/node_executor/aicore/aicore_op_task.h"
#include "framework/common/taskdown_common.h"
#include "framework/common/debug/log.h"
#include "graph/ge_context.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/types.h"
#undef private
#undef protected
using namespace std;
using namespace testing;
using namespace ge;
class UtestGeHybrid : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);
op_desc->SetWorkspace({});
;
op_desc->SetWorkspaceBytes({});
op_desc->SetInputOffset({});
op_desc->SetOutputOffset({});
ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
bool support_dynamic = true;
ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
return op_desc;
}
TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
// build aicore task
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
OpDescPtr op_desc = CreateOpDesc("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
char *handle = "";
aicore_task->handle_ = handle;
aicore_task->tiling_key_ = 1;
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
}

@ -0,0 +1,117 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <vector>
#include "graph/load/model_manager/model_utils.h"
#include "graph/utils/graph_utils.h"
#include "runtime/rt.h"
#define protected public
#define private public
#include "single_op/single_op_model.h"
#include "single_op/task/tbe_task_builder.h"
#include "single_op/task/op_task.h"
#include "single_op/task/tbe_task_builder.h"
#include "external/register/op_tiling_registry.h"
#undef private
#undef protected
using namespace std;
using namespace testing;
using namespace ge;
using namespace optiling;
class UtestSingleOpTask : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UtestSingleOpTask, test_build_kernel_task) {
string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
model.input_offset_list_.push_back(0);
model.input_sizes_.push_back(16);
model.output_offset_list_.push_back(0);
model.output_sizes_.push_back(16);
auto graph = make_shared<ComputeGraph>("graph");
auto op_desc = make_shared<OpDesc>("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
vector<int64_t> shape{16, 16};
GeShape ge_shape(shape);
GeTensorDesc desc(ge_shape);
op_desc->AddInputDesc(desc);
op_desc->AddOutputDesc(desc);
auto node = graph->AddNode(op_desc);
std::mutex stream_mu_;
rtStream_t stream_ = nullptr;
StreamResource stream_resource(0);
SingleOp single_op(&stream_resource, &stream_mu_, stream_);
domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
model.op_list_[1] = node;
TbeOpTask task_tmp;
TbeOpTask *task = &task_tmp;
ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS);
vector<GeTensorDesc> input_desc;
vector<DataBuffer> input_buffers;
vector<GeTensorDesc> output_desc;
vector<DataBuffer> output_buffers;
task->node_ = node;
OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;};
OpTilingRegistryInterf("Add", op_tiling_func);
ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key");
ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json");
char c = '0';
char* buffer = &c;
task->tiling_buffer_ = buffer;
task->max_tiling_size_ = 64;
task->tiling_data_ = "tiling_data";
task->arg_size_ = 64;
uint8_t task_args{0};
task->args_.reset(&task_args);
ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
char handle_tmp = '0';
char *handle = &handle_tmp;
task->SetHandle(handle);
ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
}

@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
#define RT_KERNEL_CUSTOM_AICPU (0x08)
/**
* @ingroup rt_kernel
* @brief kernel mode
*/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)
/**
* @ingroup rt_kernel
* @brief kernel L1 Fusion Dump bit flags
@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
*/
RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
/**
* @ingroup rt_kernel
* @brief register device binary
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);
/**
* @ingroup rt_kernel
* @brief register fast memeory device binary
@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream);
/**
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
* @param [in] smDesc shared memory description
* @param [in] stream associated stream
* @param [in] kernelInfo kernel info
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);
/**
* @ingroup rt_kernel
* @brief launch kernel to device

@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_ALL_KERNEL,
} rtModelTaskType_t;
typedef enum tagModelStreamType {
@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo {
uint16_t *argsOffset;
} rtKernelTaskInfo_t;
typedef struct tagAllKernelTaskInfo {
uint16_t blockDim;
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
const void *dev_func;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;
typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@ -251,6 +263,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allkernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;

Loading…
Cancel
Save