!6728 [Ascend][DynamicShape] Dynamic shape feature

Merge pull request !6728 from caifubi/dynamic_shape_share_2
pull/6728/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c951d42c2c

@ -18,6 +18,7 @@ import os
import sys import sys
from te.platform.cce_conf import te_set_version from te.platform.cce_conf import te_set_version
from te.platform.fusion_util import fusion_op from te.platform.fusion_util import fusion_op
import te
from common import check_kernel_info, get_args, get_build_in_impl_path from common import check_kernel_info, get_args, get_build_in_impl_path
build_in_impl_path = get_build_in_impl_path() build_in_impl_path = get_build_in_impl_path()
@ -38,6 +39,16 @@ def _initialize(impl_path):
sys.path.insert(0, op_module_name) sys.path.insert(0, op_module_name)
def _replace_range(args):
for arg in args:
if not arg.__contains__('range'):
continue
shape_range = arg["range"]
for range_item in shape_range:
for index, value in enumerate(range_item):
if value < 0:
range_item[index] = None
def build_op(build_type, json_str): def build_op(build_type, json_str):
""" """
call op functions with function name and input args json_str call op functions with function name and input args json_str
@ -71,11 +82,18 @@ def build_op(build_type, json_str):
outputs_args = get_args(kernel_info['op_info'], 'outputs') outputs_args = get_args(kernel_info['op_info'], 'outputs')
attrs_args = get_args(kernel_info['op_info'], 'attrs') attrs_args = get_args(kernel_info['op_info'], 'attrs')
kernel_name = kernel_info['op_info']['kernel_name'] kernel_name = kernel_info['op_info']['kernel_name']
is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
if is_dynamic_shape:
_replace_range(inputs_args)
_replace_range(outputs_args)
if custom_flag: if custom_flag:
op_module = __import__(op_name) op_module = __import__(op_name)
else: else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) if is_dynamic_shape:
op_module = __import__("impl.dynamic."+op_name, globals(), locals(), [op_name], 0)
else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
# get function # get function
if build_type == op_build: if build_type == op_build:
if custom_flag: if custom_flag:
@ -92,7 +110,12 @@ def build_op(build_type, json_str):
if kernel_name[0:19] == "bounding_box_encode": if kernel_name[0:19] == "bounding_box_encode":
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name) return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) if is_dynamic_shape:
with te.op.dynamic():
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
return te.op.get_compile_info()
else:
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
except Exception as e: except Exception as e:
raise RuntimeError(e) raise RuntimeError(e)

@ -78,6 +78,7 @@ def _check_supported(kernel_info):
""" """
try: try:
op_name = kernel_info['op_info']['name'] op_name = kernel_info['op_info']['name']
is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
impl_path = build_in_impl_path impl_path = build_in_impl_path
custom_flag = False custom_flag = False
if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None: if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
@ -92,8 +93,11 @@ def _check_supported(kernel_info):
if custom_flag: if custom_flag:
op_module = __import__(op_name) op_module = __import__(op_name)
elif is_dynamic_shape:
op_module = __import__("impl.dynamic." + op_name, globals(), locals(), [op_name], 0)
else: else:
op_module = __import__("impl." + op_name, globals(), locals(), [op_name], 0) op_module = __import__("impl." + op_name, globals(), locals(), [op_name], 0)
# get function # get function
if not hasattr(op_module, "check_supported"): if not hasattr(op_module, "check_supported"):
return "" return ""

@ -219,6 +219,7 @@ if (ENABLE_D)
set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver) set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
set(ASCEND_OPP_PATH ${ASCEND_PATH}/opp/op_impl/built-in/ai_core/tbe/op_tiling)
endif() endif()
MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}") MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
@ -228,7 +229,8 @@ if (ENABLE_D)
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH}) find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH}) find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH}) find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH})
target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER}) find_library(OPTILING optiling ${ASCEND_OPP_PATH})
target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER} ${OPTILING})
target_link_libraries(mindspore -Wl,--start-group proto_input ${PROFILING} mindspore::protobuf -Wl,--end-group) target_link_libraries(mindspore -Wl,--start-group proto_input ${PROFILING} mindspore::protobuf -Wl,--end-group)
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows") elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group) target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group)
@ -258,6 +260,7 @@ if (ENABLE_D)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
elseif (ENABLE_GPU) elseif (ENABLE_GPU)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64)
endif () endif ()
@ -315,6 +318,8 @@ add_library(inference SHARED
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc
${LOAD_ONNX_SRC} ${LOAD_ONNX_SRC}
) )
set_target_properties(inference PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
-Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive mindspore_gvar) -Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive mindspore_gvar)

@ -15,6 +15,7 @@ if (ENABLE_D)
"akg/akg_kernel_attrs_process.cc" "akg/akg_kernel_attrs_process.cc"
"akg/akg_kernel_metadata.cc" "akg/akg_kernel_metadata.cc"
"tbe/*.cc" "tbe/*.cc"
"host/*.cc"
"aicpu/*.cc" "aicpu/*.cc"
"rts/*.cc" "rts/*.cc"
"hccl/*.cc" "hccl/*.cc"

@ -289,51 +289,25 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
return true; return true;
} }
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) {
if (!anf_node->isa<CNode>()) {
return true;
}
if (!AnfAlgo::IsDynamicShape(anf_node)) {
return true;
}
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
uint64_t ext_info_head_len = kExtInfoHeadSize;
std::string ext_info;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
// 1.addr:unknown shape type
uint64_t ext_info_len = ext_info.size();
ext_info_len += ext_info_head_len + sizeof(int32_t);
// 2.addr:input ShapeAndType
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
// 3.addr:output ShapeAndType
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
uint64_t ext_info_offset = ext_info.size();
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
// deal1: unknown shape type // deal1: unknown shape type
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
info->infoLen = sizeof(int32_t); info->infoLen = sizeof(int32_t);
ext_info_offset += ext_info_head_len; ext_info_offset += kExtInfoHeadSize;
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset); int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
*shape_type = unknown_shape_type; *shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
ext_info_offset += info->infoLen; ext_info_offset += info->infoLen;
return ext_info_offset;
}
uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
const std::shared_ptr<AnfNode> &anf_node, size_t input_num) {
// deal2:input ShapeAndType // deal2:input ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
info->infoLen = input_num * sizeof(ShapeAndType); info->infoLen = input_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len; ext_info_offset += kExtInfoHeadSize;
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t input_index = 0; input_index < input_num; input_index++) { for (size_t input_index = 0; input_index < input_num; input_index++) {
@ -364,12 +338,16 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
} }
} }
ext_info_offset += info->infoLen; ext_info_offset += info->infoLen;
return ext_info_offset;
}
uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
const std::shared_ptr<AnfNode> &anf_node, size_t output_num) {
// deal3:output ShapeAndType // deal3:output ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
info->infoLen = output_num * sizeof(ShapeAndType); info->infoLen = output_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len; ext_info_offset += kExtInfoHeadSize;
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t output_index = 0; output_index < output_num; output_index++) { for (size_t output_index = 0; output_index < output_num; output_index++) {
@ -387,6 +365,47 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
} }
} }
ext_info_offset += info->infoLen;
return ext_info_offset;
}
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
if (!anf_node->isa<CNode>()) {
return true;
}
if (!AnfAlgo::IsDynamicShape(anf_node)) {
return true;
}
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
uint64_t ext_info_head_len = kExtInfoHeadSize;
std::string ext_info;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
// 1.addr:unknown shape type
uint64_t ext_info_len = ext_info.size();
ext_info_len += ext_info_head_len + sizeof(int32_t);
// 2.addr:input ShapeAndType
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
// 3.addr:output ShapeAndType
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
uint64_t ext_info_offset = ext_info.size();
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset);
ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);
ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);
MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset;
// set ext info // set ext info
kernel_mod_ptr->SetExtInfo(ext_info); kernel_mod_ptr->SetExtInfo(ext_info);
return true; return true;

@ -26,8 +26,13 @@
#include "utils/convert_utils.h" #include "utils/convert_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h" #include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "utils/ms_context.h" #include "utils/ms_context.h"
#include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
using AicpuDynamicKernel = mindspore::device::ascend::AiCpuDynamicKernel;
using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
@ -93,7 +98,7 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
param_len += node_def_len; param_len += node_def_len;
param_len += sizeof(uint32_t); param_len += sizeof(uint32_t);
AicpuParamHead aicpu_param_head; AicpuParamHead aicpu_param_head{};
aicpu_param_head.length = param_len; aicpu_param_head.length = param_len;
aicpu_param_head.ioAddrNum = io_addrs_num; aicpu_param_head.ioAddrNum = io_addrs_num;
@ -178,5 +183,15 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr}; return {task_info_ptr};
} }
device::DynamicKernelPtr AicpuOpKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
AddressPtrList kernel_inputs;
AddressPtrList kernel_workspaces;
AddressPtrList kernel_outputs;
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
CreateCpuKernelInfo(kernel_inputs, kernel_outputs);
return std::make_shared<AicpuDynamicKernel>(stream_ptr, cnode_ptr, args_, ext_info_, node_so_, node_name_);
}
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

@ -31,6 +31,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
void SetInputList(const std::vector<int64_t> &inputList); void SetInputList(const std::vector<int64_t> &inputList);
void SetOutputList(const std::vector<int64_t> &outputList); void SetOutputList(const std::vector<int64_t> &outputList);

@ -20,7 +20,7 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = { static const std::map<int32_t, int32_t> kMsProtoDataTypeMap = {
{mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN}, {mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN},
{mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL}, {mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL},
{mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32}, {mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32},
@ -39,14 +39,38 @@ static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
{mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64}, {mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64},
}; };
static const std::map<int32_t, int32_t> kProtoDataTypeToMsDataTypeMap = {
{mindspore::DataType::MS_UNKNOWN, mindspore::TypeId::kTypeUnknown},
{mindspore::DataType::MS_BOOL, mindspore::TypeId::kNumberTypeBool},
{mindspore::DataType::MS_INT32, mindspore::TypeId::kNumberTypeInt32},
{mindspore::DataType::MS_INT8, mindspore::TypeId::kNumberTypeInt8},
{mindspore::DataType::MS_INT16, mindspore::TypeId::kNumberTypeInt16},
{mindspore::DataType::MS_INT64, mindspore::TypeId::kNumberTypeInt64},
{mindspore::DataType::MS_UINT8, mindspore::TypeId::kNumberTypeUInt8},
{mindspore::DataType::MS_UINT16, mindspore::TypeId::kNumberTypeUInt16},
{mindspore::DataType::MS_UINT32, mindspore::TypeId::kNumberTypeUInt32},
{mindspore::DataType::MS_UINT64, mindspore::TypeId::kNumberTypeUInt64},
{mindspore::DataType::MS_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
{mindspore::DataType::MS_FLOAT32, mindspore::TypeId::kNumberTypeFloat32},
{mindspore::DataType::MS_FLOAT64, mindspore::TypeId::kNumberTypeFloat64},
};
int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) { int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) {
auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type); auto iter = kMsProtoDataTypeMap.find(ms_type);
if (iter != MS_PROTO_DATA_TYPE_MAP.end()) { if (iter == kMsProtoDataTypeMap.end()) {
return MS_PROTO_DATA_TYPE_MAP[ms_type];
} else {
MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type); MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type);
return -1; return -1;
} }
return iter->second;
}
int AicpuOpUtil::ProtoTypeToMsType(int proto_type) {
auto iter = kProtoDataTypeToMsDataTypeMap.find(proto_type);
if (iter == kProtoDataTypeToMsDataTypeMap.end()) {
MS_LOG(ERROR) << "UnSupported proto_type value:" << proto_type;
return -1;
}
return iter->second;
} }
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

@ -55,13 +55,6 @@ struct AicpuParamHead {
uint64_t extInfoAddr; // extInfo address uint64_t extInfoAddr; // extInfo address
} __attribute__((packed)); } __attribute__((packed));
const uint32_t kExtInfoHeadSize = 8;
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
// Extent info ShapeAndType // Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8; const uint32_t kMaxShapeDims = 8;
struct ShapeAndType { struct ShapeAndType {
@ -69,6 +62,14 @@ struct ShapeAndType {
int64_t dims[kMaxShapeDims]; int64_t dims[kMaxShapeDims];
} __attribute__((packed)); } __attribute__((packed));
// Extend info structure for extInfoAddr
const uint32_t kExtInfoHeadSize = 8;
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
// Extend Info type for task // Extend Info type for task
enum FWKTaskExtInfoType { enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0, FWK_ADPT_EXT_SHAPE_TYPE = 0,
@ -88,6 +89,7 @@ enum UnknowShapeOpType {
class AicpuOpUtil { class AicpuOpUtil {
public: public:
static int MsTypeToProtoType(TypeId ms_type); static int MsTypeToProtoType(TypeId ms_type);
static int ProtoTypeToMsType(int proto_type);
private: private:
// kernel id // kernel id

@ -15,15 +15,34 @@
*/ */
#include "backend/kernel_compiler/hccl/hccl_kernel.h" #include "backend/kernel_compiler/hccl/hccl_kernel.h"
#include <map>
#include "runtime/device/ascend/tasksink/runtime_utils.h" #include "runtime/device/ascend/tasksink/runtime_utils.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "utils/utils.h" #include "utils/utils.h"
#include "utils/ms_context.h" #include "utils/ms_context.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>; using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
using ge::model_runner::HcclTaskInfo; using ge::model_runner::HcclTaskInfo;
using mindspore::device::ascend::tasksink::RuntimeUtils; using mindspore::device::ascend::tasksink::RuntimeUtils;
namespace {
static std::map<std::string, std::string> kMsOpNameToHcomHcclType = {
{mindspore::kAllReduceOpName, mindspore::kHcomOpTypeAllReduce},
{mindspore::kAllGatherOpName, mindspore::kHcomOpTypeAllGather},
{mindspore::kBroadcastOpName, mindspore::kHcomOpTypeBroadcast},
{mindspore::kReduceScatterOpName, mindspore::kHcomOpTypeReduceScatter}};
std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {
auto iter = kMsOpNameToHcomHcclType.find(ms_op_type);
if (iter == kMsOpNameToHcomHcclType.end()) {
MS_LOG(EXCEPTION) << "Invalid MsOpType:" << ms_op_type;
}
return iter->second;
}
} // namespace
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
void HcclKernelFactory::Registe(const std::string &name, HcclKernelCreater &&fun) { void HcclKernelFactory::Registe(const std::string &name, HcclKernelCreater &&fun) {
@ -156,5 +175,30 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
MS_EXCEPTION_IF_NULL(task_info_ptr); MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr}; return {task_info_ptr};
} }
device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
AddressPtrList inputs;
AddressPtrList workspaces;
AddressPtrList outputs;
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &inputs, &workspaces, &outputs);
std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_));
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "Hccl kernel input is empty";
}
if (hccl_data_type_list_.empty()) {
MS_LOG(EXCEPTION) << "Hccl data type list is empty";
}
MS_EXCEPTION_IF_NULL(inputs.at(0));
auto input_data_addr = inputs.at(0)->addr;
MS_EXCEPTION_IF_NULL(outputs.at(0));
auto output_data_addr = outputs.at(0)->addr;
HcclDataType data_type = hccl_data_type_list_[0];
auto executor = std::make_shared<device::ascend::HcclDynamicKernel>(
hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);
return executor;
}
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

@ -41,6 +41,7 @@ class HcclKernel : public AscendKernelMod {
const std::vector<size_t> &GetWorkspaceSizeList() const override; const std::vector<size_t> &GetWorkspaceSizeList() const override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
protected: protected:
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_; std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;

@ -0,0 +1,52 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/host/dynamic_shape_kernel.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
void DynamicShapeKernel::Execute() {
MS_LOG(INFO) << "Execute DynamicShapeKernel Start";
auto input_num = AnfAlgo::GetInputTensorNum(cnode_ptr_);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Invalid Input Num:" << input_num;
}
auto prev_output_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, 0);
auto output_shape = std::vector<int>(SizeToInt(prev_output_shape.size()));
auto output_type = TypeId::kNumberTypeInt32;
auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
auto data_ptr = static_cast<int32_t *>(output_tensor_for_sync->data_c());
for (size_t i = 0; i < prev_output_shape.size(); ++i) {
MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
*(data_ptr + i) = prev_output_shape[i];
}
auto output_addr = AnfAlgo::GetOutputAddr(cnode_ptr_, 0);
MS_EXCEPTION_IF_NULL(output_addr);
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c());
MS_LOG(INFO) << "Execute DynamicShapeKernel End";
}
device::DynamicKernelPtr DynamicShapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
return std::make_shared<DynamicShapeKernel>(stream_ptr, cnode_ptr);
}
} // namespace kernel
} // namespace mindspore

@ -0,0 +1,43 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
#include <vector>
#include <memory>
#include <string>
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
#include "backend/kernel_compiler/host/host_kernel_mod.h"
using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
namespace mindspore {
namespace kernel {
class DynamicShapeKernel : public HostDynamicKernel {
public:
DynamicShapeKernel(void *stream, const CNodePtr &cnode_ptr) : HostDynamicKernel(stream, cnode_ptr) {}
~DynamicShapeKernel() override = default;
void Execute() override;
};
class DynamicShapeKernelMod : public HostKernelMod {
public:
DynamicShapeKernelMod() = default;
~DynamicShapeKernelMod() override = default;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
};
MS_HOST_REG_KERNEL(DynamicShape, DynamicShapeKernelMod);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_

@ -0,0 +1,42 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/host/host_kernel_build.h"
#include <string>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/host/host_kernel_mod.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/session/kernel_graph.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace kernel {
KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string opname = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "Host op [" << opname << "]";
auto kerPtr = HostKernelFactory::Get(opname);
if (kerPtr == nullptr) {
MS_LOG(ERROR) << "Host can't find Kernel[" << opname << "]";
return nullptr;
}
if (!kerPtr->Init(anf_node)) {
MS_LOG(ERROR) << "Host Kernel initialize failed!";
return nullptr;
}
return kerPtr;
}
} // namespace kernel
} // namespace mindspore

@ -0,0 +1,27 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
#include <memory>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_

@ -0,0 +1,59 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/host/host_kernel_metadata.h"
#include <memory>
#include <string>
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
constexpr auto kDynamicShape = "DynamicShape";
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
MS_LOG(INFO) << "HostMetadataInfo.";
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
if (op_name != kDynamicShape) {
MS_LOG(DEBUG) << "Host does not have op [" << op_name << "]";
return;
}
std::vector<std::string> inputs_format{};
std::vector<TypeId> inputs_type{};
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
inputs_format.emplace_back(kOpFormat_DEFAULT);
inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
}
std::vector<std::string> outputs_format;
std::vector<TypeId> outputs_type;
for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
outputs_format.emplace_back(kOpFormat_DEFAULT);
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
}
auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
builder.SetInputsFormat(inputs_format);
builder.SetInputsDeviceType(inputs_type);
builder.SetOutputsFormat(outputs_format);
builder.SetOutputsDeviceType(outputs_type);
builder.SetKernelType(HOST_KERNEL);
kernel_info_list->push_back(builder.Build());
}
} // namespace kernel
} // namespace mindspore

@ -0,0 +1,30 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
#include <string>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_

@ -0,0 +1,98 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/host/host_kernel_mod.h"
#include <memory>
#include <vector>
#include <string>
#include <utility>
#include "runtime/mem.h"
#include "utils/ms_context.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
namespace mindspore {
namespace kernel {
void HostKernelFactory::Registe(const std::string &name, HostKernelCreater &&fun) {
hostKernelMap_.emplace(name, std::move(fun));
}
std::shared_ptr<HostKernelMod> HostKernelFactory::Get(const std::string &name) {
const auto &map = Get().hostKernelMap_;
auto it = map.find(name);
if (it != map.end() && it->second) {
return (it->second)();
}
return nullptr;
}
HostKernelFactory &HostKernelFactory::Get() {
static HostKernelFactory instance;
return instance;
}
const std::vector<size_t> &HostKernelMod::GetInputSizeList() const { return input_size_list_; }
const std::vector<size_t> &HostKernelMod::GetOutputSizeList() const { return output_size_list_; }
const std::vector<size_t> &HostKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
bool HostKernelMod::Init(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
for (size_t i = 0; i < input_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
input_size_list_.push_back(LongToSize(size_i));
}
for (size_t i = 0; i < output_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
output_size_list_.push_back(LongToSize(size_i));
}
return true;
}
bool HostKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
return true;
}
std::vector<TaskInfoPtr> HostKernelMod::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) {
return {};
}
} // namespace kernel
} // namespace mindspore

@ -0,0 +1,86 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
#include <vector>
#include <memory>
#include <string>
#include <map>
#include <utility>
#include "backend/kernel_compiler/ascend_kernel_mod.h"
namespace mindspore {
namespace kernel {
class HostKernelMod : public AscendKernelMod {
public:
HostKernelMod() = default;
~HostKernelMod() override = default;
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0;
bool Init(const AnfNodePtr &anf_node);
protected:
AnfNodePtr anf_node_;
std::string op_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
using HostKernelModPtr = std::shared_ptr<HostKernelMod>;
using HostKernelModPtrList = std::vector<HostKernelModPtr>;
using HostKernelCreater = std::function<std::shared_ptr<HostKernelMod>()>;
class HostKernelFactory {
HostKernelFactory() = default;
~HostKernelFactory() = default;
public:
static HostKernelFactory &Get();
void Registe(const string &name, HostKernelCreater &&fun);
static std::shared_ptr<HostKernelMod> Get(const string &name);
private:
std::map<string, HostKernelCreater> hostKernelMap_;
};
class _HostKernelRegister {
public:
_HostKernelRegister(const string &name, HostKernelCreater &&fun) {
HostKernelFactory::Get().Registe(name, std::move(fun));
}
~_HostKernelRegister() = default;
};
#define _MS_HOST_REG_KERNEL_REG(KNAME, clazz) \
static_assert(std::is_base_of<HostKernelMod, clazz>::value, " must be base of HostKernelMod"); \
static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
std::shared_ptr<clazz> ptr = nullptr; \
ptr = std::make_shared<clazz>(); \
MS_EXCEPTION_IF_NULL(ptr); \
return ptr; \
});
#define MS_HOST_REG_KERNEL(KNAME, clazz) _MS_HOST_REG_KERNEL_REG(KNAME, clazz)
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_

@ -174,6 +174,9 @@ void KernelPack::ParseKernelJson(const nlohmann::json &js) {
kernel_json_info_.block_dim = js["blockDim"]; kernel_json_info_.block_dim = js["blockDim"];
kernel_json_info_.kernel_name = js["kernelName"]; kernel_json_info_.kernel_name = js["kernelName"];
kernel_json_info_.magic = js["magic"]; kernel_json_info_.magic = js["magic"];
if (js.contains("opParaSize")) {
kernel_json_info_.op_para_size = js["opParaSize"];
}
if (js.find("parameters") != js.end()) { if (js.find("parameters") != js.end()) {
if (!js.at("parameters").is_array()) { if (!js.at("parameters").is_array()) {
MS_LOG(DEBUG) << "Format error!,parameters should be array."; MS_LOG(DEBUG) << "Format error!,parameters should be array.";

@ -25,9 +25,18 @@
#include "ir/tensor.h" #include "ir/tensor.h"
#include "abstract/dshape.h" #include "abstract/dshape.h"
#include "utils/log_adapter.h" #include "utils/log_adapter.h"
#include "runtime/device/executor/dynamic_kernel.h"
namespace mindspore { namespace mindspore {
enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AKG_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; enum KernelType : int {
UNKNOWN_KERNEL_TYPE = 0,
AKG_KERNEL,
AICPU_KERNEL,
RT_KERNEL,
HCCL_KERNEL,
TBE_KERNEL,
HOST_KERNEL
};
namespace kernel { namespace kernel {
// Supported fusion type // Supported fusion type
@ -69,7 +78,8 @@ struct KernelJsonInfo {
std::vector<size_t> parameters; std::vector<size_t> parameters;
std::string sha256; std::string sha256;
std::vector<size_t> workspaces; std::vector<size_t> workspaces;
KernelJsonInfo() : block_dim(0) {} uint32_t op_para_size;
KernelJsonInfo() : block_dim(0), op_para_size(0) {}
}; };
class KernelPack { class KernelPack {
@ -118,6 +128,7 @@ class KernelMod {
virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0; virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0; const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return nullptr; }
virtual std::vector<size_t> GenParameters() { return {}; } virtual std::vector<size_t> GenParameters() { return {}; }
virtual void ReleaseResource() {} virtual void ReleaseResource() {}

@ -83,8 +83,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
while (!build_manger->IsAllTaskFinish()) { while (!build_manger->IsAllTaskFinish()) {
int task_id = -1; int task_id = -1;
std::string task_result; std::string task_result;
std::string pre_build_result; std::string build_result;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); auto ret = build_manger->WaitOne(&task_id, &task_result, &build_result);
if (!ret) { if (!ret) {
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
} }
@ -94,7 +94,7 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
<< " change to single op build."; << " change to single op build.";
build_failed_num++; build_failed_num++;
} }
auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, build_result, false);
if (kernel_mod_item.second != nullptr) { if (kernel_mod_item.second != nullptr) {
(void)kernel_mod_ret.emplace(kernel_mod_item); (void)kernel_mod_ret.emplace(kernel_mod_item);
} }

@ -18,6 +18,7 @@
#include <memory> #include <memory>
#include <algorithm> #include <algorithm>
#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h" #include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
#include "backend/kernel_compiler/host/host_kernel_metadata.h"
#include "backend/kernel_compiler/rts/rt_kernel_info.h" #include "backend/kernel_compiler/rts/rt_kernel_info.h"
#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h" #include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h" #include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
@ -86,6 +87,9 @@ void KernelQueryAll(const CNodePtr &kernel_node,
if (kernel_info_list->empty()) { if (kernel_info_list->empty()) {
HcclMetadataInfo(kernel_node, kernel_info_list); HcclMetadataInfo(kernel_node, kernel_info_list);
} }
if (kernel_info_list->empty()) {
HostMetadataInfo(kernel_node, kernel_info_list);
}
if (kernel_info_list->empty()) { if (kernel_info_list->empty()) {
MS_EXCEPTION(NotExistsError) MS_EXCEPTION(NotExistsError)
<< "Failed to obtain operator info, Please check whether the operator info is registered, Op full name:" << "Failed to obtain operator info, Please check whether the operator info is registered, Op full name:"

@ -102,6 +102,7 @@ class OpInfo {
kernel_name_ = opinfo.kernel_name(); kernel_name_ = opinfo.kernel_name();
partial_flag_ = opinfo.partial_flag_; partial_flag_ = opinfo.partial_flag_;
dynamic_format_ = opinfo.dynamic_format_; dynamic_format_ = opinfo.dynamic_format_;
dynamic_shape_ = opinfo.dynamic_shape_;
op_pattern_ = opinfo.op_pattern(); op_pattern_ = opinfo.op_pattern();
processor_ = opinfo.processor_; processor_ = opinfo.processor_;
for (const auto &attr : opinfo.attrs_ptr()) { for (const auto &attr : opinfo.attrs_ptr()) {
@ -122,12 +123,14 @@ class OpInfo {
std::string fusion_type() const { return fusion_type_; } std::string fusion_type() const { return fusion_type_; }
std::string kernel_name() const { return kernel_name_; } std::string kernel_name() const { return kernel_name_; }
OpPattern op_pattern() const { return op_pattern_; } OpPattern op_pattern() const { return op_pattern_; }
bool dynamic_shape() const { return dynamic_shape_; }
std::string processor() const { return processor_; } std::string processor() const { return processor_; }
std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; } std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; }
std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; } std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; }
std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; } std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; }
const std::unordered_map<size_t, size_t> &ref_infos() const { return ref_infos_; } const std::unordered_map<size_t, size_t> &ref_infos() const { return ref_infos_; }
void set_dynamic_shape(bool dynamic_shape) { dynamic_shape_ = dynamic_shape; }
void set_op_name(const std::string &op_name) { op_name_ = op_name; } void set_op_name(const std::string &op_name) { op_name_ = op_name; }
void set_imply_type(const OpImplyType imply_type) { imply_type_ = imply_type; } void set_imply_type(const OpImplyType imply_type) { imply_type_ = imply_type; }
void set_impl_path(const std::string &impl_path) { impl_path_ = impl_path; } void set_impl_path(const std::string &impl_path) { impl_path_ = impl_path; }
@ -149,7 +152,8 @@ class OpInfo {
void ClearOutputs() { (void)outputs_ptr_.clear(); } void ClearOutputs() { (void)outputs_ptr_.clear(); }
bool equals_to(const std::shared_ptr<OpInfo> &other_info) const { bool equals_to(const std::shared_ptr<OpInfo> &other_info) const {
return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ && return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ &&
this->processor_ == other_info->processor_; this->processor_ == other_info->processor_ && this->op_pattern_ == other_info->op_pattern_ &&
this->dynamic_shape_ == other_info->dynamic_shape_;
} }
private: private:
@ -163,6 +167,7 @@ class OpInfo {
std::string kernel_name_; std::string kernel_name_;
bool partial_flag_ = false; bool partial_flag_ = false;
bool dynamic_format_ = false; bool dynamic_format_ = false;
bool dynamic_shape_ = false;
OpPattern op_pattern_ = kCommonPattern; OpPattern op_pattern_ = kCommonPattern;
std::string processor_; std::string processor_;
std::vector<std::shared_ptr<OpAttr>> attrs_ptr_; std::vector<std::shared_ptr<OpAttr>> attrs_ptr_;

@ -38,6 +38,7 @@ constexpr auto kDynamicFormat = "dynamicFormat";
constexpr auto kFormatAgnostic = "formatAgnostic"; constexpr auto kFormatAgnostic = "formatAgnostic";
constexpr auto kBroadcast = "broadcast"; constexpr auto kBroadcast = "broadcast";
constexpr auto kReduce = "reduce"; constexpr auto kReduce = "reduce";
constexpr auto kDynamicShape = "dynamic_shape";
constexpr auto kDtypeFormat = "dtype_format"; constexpr auto kDtypeFormat = "dtype_format";
constexpr auto kAttr = "attr"; constexpr auto kAttr = "attr";
constexpr auto kIputs = "inputs"; constexpr auto kIputs = "inputs";
@ -111,6 +112,10 @@ void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_p
op_info->set_kernel_name(obj.at(kKernelName)); op_info->set_kernel_name(obj.at(kKernelName));
op_info->set_partial_flag(obj.at(kPartialFlag)); op_info->set_partial_flag(obj.at(kPartialFlag));
if (obj.find(kDynamicShape) != obj.end()) {
op_info->set_dynamic_shape(obj.at(kDynamicShape));
}
if (obj.find(kOpPattern) != obj.end()) { if (obj.find(kOpPattern) != obj.end()) {
std::string op_pattern = obj.at(kOpPattern); std::string op_pattern = obj.at(kOpPattern);
auto find_iter = kOpPatternMap.find(op_pattern); auto find_iter = kOpPatternMap.find(op_pattern);
@ -322,7 +327,7 @@ bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply
return ret; return ret;
} }
std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type) { std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type, bool is_dynamic_shape) {
if (!OpLib::RegOpFromLocalInfo()) { if (!OpLib::RegOpFromLocalInfo()) {
MS_LOG(INFO) << "Warning reg local op info failed."; MS_LOG(INFO) << "Warning reg local op info failed.";
} }
@ -338,16 +343,20 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
for (auto [iter, end] = op_info_.equal_range(op_name); iter != end; ++iter) { for (auto [iter, end] = op_info_.equal_range(op_name); iter != end; ++iter) {
auto &op_info = iter->second; auto &op_info = iter->second;
MS_EXCEPTION_IF_NULL(op_info); MS_EXCEPTION_IF_NULL(op_info);
if (op_info->imply_type() != imply_type) { if (op_info->imply_type() != imply_type) {
continue; continue;
} }
if (imply_type == kAKG && op_info->processor() != target_processor) { if (imply_type == kAKG && op_info->processor() != target_processor) {
continue; continue;
} }
if (is_dynamic_shape && !op_info->dynamic_shape()) {
continue;
}
return op_info; return op_info;
} }
MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type) MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
<< ", current op num: " << op_info_.size(); << ", current op num: " << op_info_.size() << " is_dynamic_shape:" << is_dynamic_shape;
return nullptr; return nullptr;
} }

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save