!6728 [Ascend][DynamicShape] Dynamic shape feature

Merge pull request !6728 from caifubi/dynamic_shape_share_2
5 years ago · c951d42c2c
parent 84f3a9531e d3b978147f
commit c951d42c2c
160 changed files with 4605 additions and 463 deletions
--- a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
@ -18,6 +18,7 @@ import os
 import sys
 from te.platform.cce_conf import te_set_version
 from te.platform.fusion_util import fusion_op
 import te
 from common import check_kernel_info, get_args, get_build_in_impl_path
 build_in_impl_path = get_build_in_impl_path()
@ -38,6 +39,16 @@ def _initialize(impl_path):
    sys.path.insert(0, op_module_name)
 def _replace_range(args):
    for arg in args:
        if not arg.__contains__('range'):
            continue
        shape_range = arg["range"]
        for range_item in shape_range:
            for index, value in enumerate(range_item):
                if value < 0:
                    range_item[index] = None
 def build_op(build_type, json_str):
    """
    call op functions with function name and input args json_str
@ -71,11 +82,18 @@ def build_op(build_type, json_str):
        outputs_args = get_args(kernel_info['op_info'], 'outputs')
        attrs_args = get_args(kernel_info['op_info'], 'attrs')
        kernel_name = kernel_info['op_info']['kernel_name']
        is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
        if is_dynamic_shape:
            _replace_range(inputs_args)
            _replace_range(outputs_args)
        if custom_flag:
            op_module = __import__(op_name)
        else:
-            op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
+            if is_dynamic_shape:
                op_module = __import__("impl.dynamic."+op_name, globals(), locals(), [op_name], 0)
            else:
                op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
        # get function
        if build_type == op_build:
            if custom_flag:
@ -92,7 +110,12 @@ def build_op(build_type, json_str):
        if kernel_name[0:19] == "bounding_box_encode":
            return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
-        return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
+        if is_dynamic_shape:
            with te.op.dynamic():
                op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
                return te.op.get_compile_info()
        else:
            return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
    except Exception as e:
        raise RuntimeError(e)
--- a/mindspore/_extends/parallel_compile/tbe_compiler/helper.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/helper.py
@ -78,6 +78,7 @@ def _check_supported(kernel_info):
    """
    try:
        op_name = kernel_info['op_info']['name']
        is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
        impl_path = build_in_impl_path
        custom_flag = False
        if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
@ -92,8 +93,11 @@ def _check_supported(kernel_info):
        if custom_flag:
            op_module = __import__(op_name)
        elif is_dynamic_shape:
            op_module = __import__("impl.dynamic." + op_name, globals(), locals(), [op_name], 0)
        else:
            op_module = __import__("impl." + op_name, globals(), locals(), [op_name], 0)
        # get function
        if not hasattr(op_module, "check_supported"):
            return ""
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -219,6 +219,7 @@ if (ENABLE_D)
        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
        set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
        set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
        set(ASCEND_OPP_PATH ${ASCEND_PATH}/opp/op_impl/built-in/ai_core/tbe/op_tiling)
    endif()
    MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
@ -228,7 +229,8 @@ if (ENABLE_D)
    find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
    find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
    find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH})
-    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT}  ${HCCL} ${DATATRANSFER})
+    find_library(OPTILING optiling ${ASCEND_OPP_PATH})
    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT}  ${HCCL} ${DATATRANSFER} ${OPTILING})
    target_link_libraries(mindspore -Wl,--start-group proto_input ${PROFILING} mindspore::protobuf -Wl,--end-group)
 elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
    target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group)
@ -258,6 +260,7 @@ if (ENABLE_D)
    set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
    set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
    set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons)
    set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
 elseif (ENABLE_GPU)
    set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64)
 endif ()
@ -315,6 +318,8 @@ add_library(inference SHARED
        ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc
        ${LOAD_ONNX_SRC}
        )
 set_target_properties(inference PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
 target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
        -Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive mindspore_gvar)
--- a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@ -15,6 +15,7 @@ if (ENABLE_D)
 		"akg/akg_kernel_attrs_process.cc"
 		"akg/akg_kernel_metadata.cc"
 		"tbe/*.cc"
 		"host/*.cc"
 		"aicpu/*.cc"
 		"rts/*.cc"
 		"hccl/*.cc"
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
@ -289,51 +289,25 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
  return true;
 }
-bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
+uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) {
  if (!anf_node->isa<CNode>()) {
    return true;
  }
  if (!AnfAlgo::IsDynamicShape(anf_node)) {
    return true;
  }
  MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
  int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
  uint64_t ext_info_head_len = kExtInfoHeadSize;
  std::string ext_info;
  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
  // 1.addr:unknown shape type
  uint64_t ext_info_len = ext_info.size();
  ext_info_len += ext_info_head_len + sizeof(int32_t);
  // 2.addr:input ShapeAndType
  ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
  // 3.addr:output ShapeAndType
  ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
  uint64_t ext_info_offset = ext_info.size();
  ext_info.resize(ext_info_len, 0);
  char *ext_info_buf = ext_info.data();
  // deal1: unknown shape type
  ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
  info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
  info->infoLen = sizeof(int32_t);
-  ext_info_offset += ext_info_head_len;
+  ext_info_offset += kExtInfoHeadSize;
  int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
-  *shape_type = unknown_shape_type;
+  *shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
  ext_info_offset += info->infoLen;
  return ext_info_offset;
 }
 uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
                                  const std::shared_ptr<AnfNode> &anf_node, size_t input_num) {
  // deal2:input ShapeAndType
-  info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
+  ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
  info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
  info->infoLen = input_num * sizeof(ShapeAndType);
-  ext_info_offset += ext_info_head_len;
+  ext_info_offset += kExtInfoHeadSize;
  ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
  for (size_t input_index = 0; input_index < input_num; input_index++) {
@ -364,12 +338,16 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
    }
  }
  ext_info_offset += info->infoLen;
  return ext_info_offset;
 }
 uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
                                   const std::shared_ptr<AnfNode> &anf_node, size_t output_num) {
  // deal3:output ShapeAndType
-  info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
+  ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
  info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
  info->infoLen = output_num * sizeof(ShapeAndType);
-  ext_info_offset += ext_info_head_len;
+  ext_info_offset += kExtInfoHeadSize;
  ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
  for (size_t output_index = 0; output_index < output_num; output_index++) {
@ -387,6 +365,47 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
    }
  }
  ext_info_offset += info->infoLen;
  return ext_info_offset;
 }
 bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
  MS_EXCEPTION_IF_NULL(anf_node);
  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
  if (!anf_node->isa<CNode>()) {
    return true;
  }
  if (!AnfAlgo::IsDynamicShape(anf_node)) {
    return true;
  }
  MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
  uint64_t ext_info_head_len = kExtInfoHeadSize;
  std::string ext_info;
  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
  // 1.addr:unknown shape type
  uint64_t ext_info_len = ext_info.size();
  ext_info_len += ext_info_head_len + sizeof(int32_t);
  // 2.addr:input ShapeAndType
  ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
  // 3.addr:output ShapeAndType
  ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
  uint64_t ext_info_offset = ext_info.size();
  ext_info.resize(ext_info_len, 0);
  char *ext_info_buf = ext_info.data();
  ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset);
  ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);
  ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);
  MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset;
  // set ext info
  kernel_mod_ptr->SetExtInfo(ext_info);
  return true;
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
@ -26,8 +26,13 @@
 #include "utils/convert_utils.h"
 #include "backend/kernel_compiler/aicpu/aicpu_util.h"
 #include "utils/ms_context.h"
 #include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
 #include "runtime/device/kernel_runtime.h"
 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
 using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
 using AicpuDynamicKernel = mindspore::device::ascend::AiCpuDynamicKernel;
 using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
 namespace mindspore {
 namespace kernel {
@ -93,7 +98,7 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
  param_len += node_def_len;
  param_len += sizeof(uint32_t);
-  AicpuParamHead aicpu_param_head;
+  AicpuParamHead aicpu_param_head{};
  aicpu_param_head.length = param_len;
  aicpu_param_head.ioAddrNum = io_addrs_num;
@ -178,5 +183,15 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
  MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
  return {task_info_ptr};
 }
 device::DynamicKernelPtr AicpuOpKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
  AddressPtrList kernel_inputs;
  AddressPtrList kernel_workspaces;
  AddressPtrList kernel_outputs;
  device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
  CreateCpuKernelInfo(kernel_inputs, kernel_outputs);
  return std::make_shared<AicpuDynamicKernel>(stream_ptr, cnode_ptr, args_, ext_info_, node_so_, node_name_);
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
@ -31,6 +31,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
  std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                                   const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
  device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
  void SetInputList(const std::vector<int64_t> &inputList);
  void SetOutputList(const std::vector<int64_t> &outputList);
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
@ -20,7 +20,7 @@
 namespace mindspore {
 namespace kernel {
-static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
+static const std::map<int32_t, int32_t> kMsProtoDataTypeMap = {
  {mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN},
  {mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL},
  {mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32},
@ -39,14 +39,38 @@ static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
  {mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64},
 };
 static const std::map<int32_t, int32_t> kProtoDataTypeToMsDataTypeMap = {
  {mindspore::DataType::MS_UNKNOWN, mindspore::TypeId::kTypeUnknown},
  {mindspore::DataType::MS_BOOL, mindspore::TypeId::kNumberTypeBool},
  {mindspore::DataType::MS_INT32, mindspore::TypeId::kNumberTypeInt32},
  {mindspore::DataType::MS_INT8, mindspore::TypeId::kNumberTypeInt8},
  {mindspore::DataType::MS_INT16, mindspore::TypeId::kNumberTypeInt16},
  {mindspore::DataType::MS_INT64, mindspore::TypeId::kNumberTypeInt64},
  {mindspore::DataType::MS_UINT8, mindspore::TypeId::kNumberTypeUInt8},
  {mindspore::DataType::MS_UINT16, mindspore::TypeId::kNumberTypeUInt16},
  {mindspore::DataType::MS_UINT32, mindspore::TypeId::kNumberTypeUInt32},
  {mindspore::DataType::MS_UINT64, mindspore::TypeId::kNumberTypeUInt64},
  {mindspore::DataType::MS_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
  {mindspore::DataType::MS_FLOAT32, mindspore::TypeId::kNumberTypeFloat32},
  {mindspore::DataType::MS_FLOAT64, mindspore::TypeId::kNumberTypeFloat64},
 };
 int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) {
-  auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type);
+  auto iter = kMsProtoDataTypeMap.find(ms_type);
-  if (iter != MS_PROTO_DATA_TYPE_MAP.end()) {
+  if (iter == kMsProtoDataTypeMap.end()) {
    return MS_PROTO_DATA_TYPE_MAP[ms_type];
  } else {
    MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type);
    return -1;
  }
  return iter->second;
 }
 int AicpuOpUtil::ProtoTypeToMsType(int proto_type) {
  auto iter = kProtoDataTypeToMsDataTypeMap.find(proto_type);
  if (iter == kProtoDataTypeToMsDataTypeMap.end()) {
    MS_LOG(ERROR) << "UnSupported proto_type value:" << proto_type;
    return -1;
  }
  return iter->second;
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
@ -55,13 +55,6 @@ struct AicpuParamHead {
  uint64_t extInfoAddr;    // extInfo address
 } __attribute__((packed));
 const uint32_t kExtInfoHeadSize = 8;
 struct ExtInfo {
  int32_t infoType;  // extend type
  uint32_t infoLen;  // length for infoMsg
  char infoMsg[0];   // extend value
 } __attribute__((packed));
 // Extent info ShapeAndType
 const uint32_t kMaxShapeDims = 8;
 struct ShapeAndType {
@ -69,6 +62,14 @@ struct ShapeAndType {
  int64_t dims[kMaxShapeDims];
 } __attribute__((packed));
 // Extend info structure for extInfoAddr
 const uint32_t kExtInfoHeadSize = 8;
 struct ExtInfo {
  int32_t infoType;  // extend type
  uint32_t infoLen;  // length for infoMsg
  char infoMsg[0];   // extend value
 } __attribute__((packed));
 // Extend Info type for task
 enum FWKTaskExtInfoType {
  FWK_ADPT_EXT_SHAPE_TYPE = 0,
@ -88,6 +89,7 @@ enum UnknowShapeOpType {
 class AicpuOpUtil {
 public:
  static int MsTypeToProtoType(TypeId ms_type);
  static int ProtoTypeToMsType(int proto_type);
 private:
  // kernel id
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
@ -15,15 +15,34 @@
 */
 #include "backend/kernel_compiler/hccl/hccl_kernel.h"
 #include <map>
 #include "runtime/device/ascend/tasksink/runtime_utils.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 #include "utils/ms_context.h"
 #include "runtime/device/kernel_runtime.h"
 #include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
 using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
 using ge::model_runner::HcclTaskInfo;
 using mindspore::device::ascend::tasksink::RuntimeUtils;
 namespace {
 static std::map<std::string, std::string> kMsOpNameToHcomHcclType = {
  {mindspore::kAllReduceOpName, mindspore::kHcomOpTypeAllReduce},
  {mindspore::kAllGatherOpName, mindspore::kHcomOpTypeAllGather},
  {mindspore::kBroadcastOpName, mindspore::kHcomOpTypeBroadcast},
  {mindspore::kReduceScatterOpName, mindspore::kHcomOpTypeReduceScatter}};
 std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {
  auto iter = kMsOpNameToHcomHcclType.find(ms_op_type);
  if (iter == kMsOpNameToHcomHcclType.end()) {
    MS_LOG(EXCEPTION) << "Invalid MsOpType:" << ms_op_type;
  }
  return iter->second;
 }
 }  // namespace
 namespace mindspore {
 namespace kernel {
 void HcclKernelFactory::Registe(const std::string &name, HcclKernelCreater &&fun) {
@ -156,5 +175,30 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
 device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
  AddressPtrList inputs;
  AddressPtrList workspaces;
  AddressPtrList outputs;
  device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &inputs, &workspaces, &outputs);
  std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_));
  if (inputs.empty()) {
    MS_LOG(EXCEPTION) << "Hccl kernel input is empty";
  }
  if (hccl_data_type_list_.empty()) {
    MS_LOG(EXCEPTION) << "Hccl data type list is empty";
  }
  MS_EXCEPTION_IF_NULL(inputs.at(0));
  auto input_data_addr = inputs.at(0)->addr;
  MS_EXCEPTION_IF_NULL(outputs.at(0));
  auto output_data_addr = outputs.at(0)->addr;
  HcclDataType data_type = hccl_data_type_list_[0];
  auto executor = std::make_shared<device::ascend::HcclDynamicKernel>(
    hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);
  return executor;
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
@ -41,6 +41,7 @@ class HcclKernel : public AscendKernelMod {
  const std::vector<size_t> &GetWorkspaceSizeList() const override;
  std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                                   const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
  device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
 protected:
  std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc
@ -0,0 +1,52 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/kernel_compiler/host/dynamic_shape_kernel.h"
 #include "backend/session/anf_runtime_algorithm.h"
 namespace mindspore {
 namespace kernel {
 void DynamicShapeKernel::Execute() {
  MS_LOG(INFO) << "Execute DynamicShapeKernel Start";
  auto input_num = AnfAlgo::GetInputTensorNum(cnode_ptr_);
  if (input_num != 1) {
    MS_LOG(EXCEPTION) << "Invalid Input Num:" << input_num;
  }
  auto prev_output_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, 0);
  auto output_shape = std::vector<int>(SizeToInt(prev_output_shape.size()));
  auto output_type = TypeId::kNumberTypeInt32;
  auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
  auto data_ptr = static_cast<int32_t *>(output_tensor_for_sync->data_c());
  for (size_t i = 0; i < prev_output_shape.size(); ++i) {
    MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
    *(data_ptr + i) = prev_output_shape[i];
  }
  auto output_addr = AnfAlgo::GetOutputAddr(cnode_ptr_, 0);
  MS_EXCEPTION_IF_NULL(output_addr);
  output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
                                output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c());
  MS_LOG(INFO) << "Execute DynamicShapeKernel End";
 }
 device::DynamicKernelPtr DynamicShapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
  return std::make_shared<DynamicShapeKernel>(stream_ptr, cnode_ptr);
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.h
@ -0,0 +1,43 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
 #include <vector>
 #include <memory>
 #include <string>
 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
 #include "backend/kernel_compiler/host/host_kernel_mod.h"
 using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
 namespace mindspore {
 namespace kernel {
 class DynamicShapeKernel : public HostDynamicKernel {
 public:
  DynamicShapeKernel(void *stream, const CNodePtr &cnode_ptr) : HostDynamicKernel(stream, cnode_ptr) {}
  ~DynamicShapeKernel() override = default;
  void Execute() override;
 };
 class DynamicShapeKernelMod : public HostKernelMod {
 public:
  DynamicShapeKernelMod() = default;
  ~DynamicShapeKernelMod() override = default;
  device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
 };
 MS_HOST_REG_KERNEL(DynamicShape, DynamicShapeKernelMod);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_build.cc
@ -0,0 +1,42 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/kernel_compiler/host/host_kernel_build.h"
 #include <string>
 #include "runtime/device/kernel_runtime.h"
 #include "backend/kernel_compiler/host/host_kernel_mod.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/session/kernel_graph.h"
 #include "backend/kernel_compiler/common_utils.h"
 namespace mindspore {
 namespace kernel {
 KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
  MS_EXCEPTION_IF_NULL(anf_node);
  std::string opname = AnfAlgo::GetCNodeName(anf_node);
  MS_LOG(INFO) << "Host op [" << opname << "]";
  auto kerPtr = HostKernelFactory::Get(opname);
  if (kerPtr == nullptr) {
    MS_LOG(ERROR) << "Host can't find Kernel[" << opname << "]";
    return nullptr;
  }
  if (!kerPtr->Init(anf_node)) {
    MS_LOG(ERROR) << "Host Kernel initialize failed!";
    return nullptr;
  }
  return kerPtr;
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_build.h
@ -0,0 +1,27 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
 #include <memory>
 #include "backend/kernel_compiler/kernel.h"
 namespace mindspore {
 namespace kernel {
 KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc
@ -0,0 +1,59 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/kernel_compiler/host/host_kernel_metadata.h"
 #include <memory>
 #include <string>
 #include "backend/kernel_compiler/oplib/oplib.h"
 #include "backend/kernel_compiler/common_utils.h"
 #include "backend/session/anf_runtime_algorithm.h"
 namespace mindspore {
 namespace kernel {
 constexpr auto kDynamicShape = "DynamicShape";
 void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
  MS_LOG(INFO) << "HostMetadataInfo.";
  MS_EXCEPTION_IF_NULL(kernel_node);
  MS_EXCEPTION_IF_NULL(kernel_info_list);
  std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  if (op_name != kDynamicShape) {
    MS_LOG(DEBUG) << "Host does not have op [" << op_name << "]";
    return;
  }
  std::vector<std::string> inputs_format{};
  std::vector<TypeId> inputs_type{};
  for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
    inputs_format.emplace_back(kOpFormat_DEFAULT);
    inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
  }
  std::vector<std::string> outputs_format;
  std::vector<TypeId> outputs_type;
  for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
    outputs_format.emplace_back(kOpFormat_DEFAULT);
    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
  }
  auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
  builder.SetInputsFormat(inputs_format);
  builder.SetInputsDeviceType(inputs_type);
  builder.SetOutputsFormat(outputs_format);
  builder.SetOutputsDeviceType(outputs_type);
  builder.SetKernelType(HOST_KERNEL);
  kernel_info_list->push_back(builder.Build());
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.h
@ -0,0 +1,30 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
 #include <string>
 #include <vector>
 #include <memory>
 #include "backend/kernel_compiler/kernel_build_info.h"
 namespace mindspore {
 namespace kernel {
 void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_mod.cc
@ -0,0 +1,98 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/kernel_compiler/host/host_kernel_mod.h"
 #include <memory>
 #include <vector>
 #include <string>
 #include <utility>
 #include "runtime/mem.h"
 #include "utils/ms_context.h"
 #include "runtime/device/kernel_runtime.h"
 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
 namespace mindspore {
 namespace kernel {
 void HostKernelFactory::Registe(const std::string &name, HostKernelCreater &&fun) {
  hostKernelMap_.emplace(name, std::move(fun));
 }
 std::shared_ptr<HostKernelMod> HostKernelFactory::Get(const std::string &name) {
  const auto &map = Get().hostKernelMap_;
  auto it = map.find(name);
  if (it != map.end() && it->second) {
    return (it->second)();
  }
  return nullptr;
 }
 HostKernelFactory &HostKernelFactory::Get() {
  static HostKernelFactory instance;
  return instance;
 }
 const std::vector<size_t> &HostKernelMod::GetInputSizeList() const { return input_size_list_; }
 const std::vector<size_t> &HostKernelMod::GetOutputSizeList() const { return output_size_list_; }
 const std::vector<size_t> &HostKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
 bool HostKernelMod::Init(const AnfNodePtr &anf_node) {
  MS_EXCEPTION_IF_NULL(anf_node);
  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
  for (size_t i = 0; i < input_num; i++) {
    std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
    TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
    MS_EXCEPTION_IF_NULL(type_ptr);
    int64_t size_i = 1;
    for (size_t j = 0; j < shape_i.size(); j++) {
      size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
    }
    size_t type_byte = GetTypeByte(type_ptr);
    if (type_byte == 0) {
      return false;
    }
    size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
    input_size_list_.push_back(LongToSize(size_i));
  }
  for (size_t i = 0; i < output_num; i++) {
    std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
    TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
    MS_EXCEPTION_IF_NULL(type_ptr);
    int64_t size_i = 1;
    for (size_t j = 0; j < shape_i.size(); j++) {
      size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
    }
    size_t type_byte = GetTypeByte(type_ptr);
    if (type_byte == 0) {
      return false;
    }
    size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
    output_size_list_.push_back(LongToSize(size_i));
  }
  return true;
 }
 bool HostKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                           const std::vector<AddressPtr> &outputs, void *stream_ptr) {
  return true;
 }
 std::vector<TaskInfoPtr> HostKernelMod::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
                                                const std::vector<AddressPtr> &, uint32_t) {
  return {};
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_mod.h
@ -0,0 +1,86 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
 #include <vector>
 #include <memory>
 #include <string>
 #include <map>
 #include <utility>
 #include "backend/kernel_compiler/ascend_kernel_mod.h"
 namespace mindspore {
 namespace kernel {
 class HostKernelMod : public AscendKernelMod {
 public:
  HostKernelMod() = default;
  ~HostKernelMod() override = default;
  const std::vector<size_t> &GetInputSizeList() const override;
  const std::vector<size_t> &GetOutputSizeList() const override;
  const std::vector<size_t> &GetWorkspaceSizeList() const override;
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
  std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
                                   const std::vector<AddressPtr> &, uint32_t) override;
  device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0;
  bool Init(const AnfNodePtr &anf_node);
 protected:
  AnfNodePtr anf_node_;
  std::string op_name_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
 };
 using HostKernelModPtr = std::shared_ptr<HostKernelMod>;
 using HostKernelModPtrList = std::vector<HostKernelModPtr>;
 using HostKernelCreater = std::function<std::shared_ptr<HostKernelMod>()>;
 class HostKernelFactory {
  HostKernelFactory() = default;
  ~HostKernelFactory() = default;
 public:
  static HostKernelFactory &Get();
  void Registe(const string &name, HostKernelCreater &&fun);
  static std::shared_ptr<HostKernelMod> Get(const string &name);
 private:
  std::map<string, HostKernelCreater> hostKernelMap_;
 };
 class _HostKernelRegister {
 public:
  _HostKernelRegister(const string &name, HostKernelCreater &&fun) {
    HostKernelFactory::Get().Registe(name, std::move(fun));
  }
  ~_HostKernelRegister() = default;
 };
 #define _MS_HOST_REG_KERNEL_REG(KNAME, clazz)                                                    \
  static_assert(std::is_base_of<HostKernelMod, clazz>::value, " must be base of HostKernelMod"); \
  static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() {                      \
    std::shared_ptr<clazz> ptr = nullptr;                                                        \
    ptr = std::make_shared<clazz>();                                                             \
    MS_EXCEPTION_IF_NULL(ptr);                                                                   \
    return ptr;                                                                                  \
  });
 #define MS_HOST_REG_KERNEL(KNAME, clazz) _MS_HOST_REG_KERNEL_REG(KNAME, clazz)
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
@ -174,6 +174,9 @@ void KernelPack::ParseKernelJson(const nlohmann::json &js) {
  kernel_json_info_.block_dim = js["blockDim"];
  kernel_json_info_.kernel_name = js["kernelName"];
  kernel_json_info_.magic = js["magic"];
  if (js.contains("opParaSize")) {
    kernel_json_info_.op_para_size = js["opParaSize"];
  }
  if (js.find("parameters") != js.end()) {
    if (!js.at("parameters").is_array()) {
      MS_LOG(DEBUG) << "Format error!,parameters should be array.";
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
@ -25,9 +25,18 @@
 #include "ir/tensor.h"
 #include "abstract/dshape.h"
 #include "utils/log_adapter.h"
 #include "runtime/device/executor/dynamic_kernel.h"
 namespace mindspore {
-enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AKG_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL };
+enum KernelType : int {
  UNKNOWN_KERNEL_TYPE = 0,
  AKG_KERNEL,
  AICPU_KERNEL,
  RT_KERNEL,
  HCCL_KERNEL,
  TBE_KERNEL,
  HOST_KERNEL
 };
 namespace kernel {
 // Supported fusion type
@ -69,7 +78,8 @@ struct KernelJsonInfo {
  std::vector<size_t> parameters;
  std::string sha256;
  std::vector<size_t> workspaces;
-  KernelJsonInfo() : block_dim(0) {}
+  uint32_t op_para_size;
  KernelJsonInfo() : block_dim(0), op_para_size(0) {}
 };
 class KernelPack {
@ -118,6 +128,7 @@ class KernelMod {
  virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
  virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                      const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
  virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return nullptr; }
  virtual std::vector<size_t> GenParameters() { return {}; }
  virtual void ReleaseResource() {}
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
@ -83,8 +83,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
  while (!build_manger->IsAllTaskFinish()) {
    int task_id = -1;
    std::string task_result;
-    std::string pre_build_result;
+    std::string build_result;
-    auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
+    auto ret = build_manger->WaitOne(&task_id, &task_result, &build_result);
    if (!ret) {
      MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
    }
@ -94,7 +94,7 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
                   << "  change to single op build.";
      build_failed_num++;
    }
-    auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false);
+    auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, build_result, false);
    if (kernel_mod_item.second != nullptr) {
      (void)kernel_mod_ret.emplace(kernel_mod_item);
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
@ -18,6 +18,7 @@
 #include <memory>
 #include <algorithm>
 #include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
 #include "backend/kernel_compiler/host/host_kernel_metadata.h"
 #include "backend/kernel_compiler/rts/rt_kernel_info.h"
 #include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
@ -86,6 +87,9 @@ void KernelQueryAll(const CNodePtr &kernel_node,
  if (kernel_info_list->empty()) {
    HcclMetadataInfo(kernel_node, kernel_info_list);
  }
  if (kernel_info_list->empty()) {
    HostMetadataInfo(kernel_node, kernel_info_list);
  }
  if (kernel_info_list->empty()) {
    MS_EXCEPTION(NotExistsError)
      << "Failed to obtain operator info, Please check whether the operator info is registered, Op full name:"
--- a/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
@ -102,6 +102,7 @@ class OpInfo {
    kernel_name_ = opinfo.kernel_name();
    partial_flag_ = opinfo.partial_flag_;
    dynamic_format_ = opinfo.dynamic_format_;
    dynamic_shape_ = opinfo.dynamic_shape_;
    op_pattern_ = opinfo.op_pattern();
    processor_ = opinfo.processor_;
    for (const auto &attr : opinfo.attrs_ptr()) {
@ -122,12 +123,14 @@ class OpInfo {
  std::string fusion_type() const { return fusion_type_; }
  std::string kernel_name() const { return kernel_name_; }
  OpPattern op_pattern() const { return op_pattern_; }
  bool dynamic_shape() const { return dynamic_shape_; }
  std::string processor() const { return processor_; }
  std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; }
  std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; }
  std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; }
  const std::unordered_map<size_t, size_t> &ref_infos() const { return ref_infos_; }
  void set_dynamic_shape(bool dynamic_shape) { dynamic_shape_ = dynamic_shape; }
  void set_op_name(const std::string &op_name) { op_name_ = op_name; }
  void set_imply_type(const OpImplyType imply_type) { imply_type_ = imply_type; }
  void set_impl_path(const std::string &impl_path) { impl_path_ = impl_path; }
@ -149,7 +152,8 @@ class OpInfo {
  void ClearOutputs() { (void)outputs_ptr_.clear(); }
  bool equals_to(const std::shared_ptr<OpInfo> &other_info) const {
    return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ &&
-           this->processor_ == other_info->processor_;
+           this->processor_ == other_info->processor_ && this->op_pattern_ == other_info->op_pattern_ &&
           this->dynamic_shape_ == other_info->dynamic_shape_;
  }
 private:
@ -163,6 +167,7 @@ class OpInfo {
  std::string kernel_name_;
  bool partial_flag_ = false;
  bool dynamic_format_ = false;
  bool dynamic_shape_ = false;
  OpPattern op_pattern_ = kCommonPattern;
  std::string processor_;
  std::vector<std::shared_ptr<OpAttr>> attrs_ptr_;
--- a/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
@ -38,6 +38,7 @@ constexpr auto kDynamicFormat = "dynamicFormat";
 constexpr auto kFormatAgnostic = "formatAgnostic";
 constexpr auto kBroadcast = "broadcast";
 constexpr auto kReduce = "reduce";
 constexpr auto kDynamicShape = "dynamic_shape";
 constexpr auto kDtypeFormat = "dtype_format";
 constexpr auto kAttr = "attr";
 constexpr auto kIputs = "inputs";
@ -111,6 +112,10 @@ void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_p
  op_info->set_kernel_name(obj.at(kKernelName));
  op_info->set_partial_flag(obj.at(kPartialFlag));
  if (obj.find(kDynamicShape) != obj.end()) {
    op_info->set_dynamic_shape(obj.at(kDynamicShape));
  }
  if (obj.find(kOpPattern) != obj.end()) {
    std::string op_pattern = obj.at(kOpPattern);
    auto find_iter = kOpPatternMap.find(op_pattern);
@ -322,7 +327,7 @@ bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply
  return ret;
 }
-std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type) {
+std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type, bool is_dynamic_shape) {
  if (!OpLib::RegOpFromLocalInfo()) {
    MS_LOG(INFO) << "Warning reg local op info failed.";
  }
@ -338,16 +343,20 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
  for (auto [iter, end] = op_info_.equal_range(op_name); iter != end; ++iter) {
    auto &op_info = iter->second;
    MS_EXCEPTION_IF_NULL(op_info);
    if (op_info->imply_type() != imply_type) {
      continue;
    }
    if (imply_type == kAKG && op_info->processor() != target_processor) {
      continue;
    }
    if (is_dynamic_shape && !op_info->dynamic_shape()) {
      continue;
    }
    return op_info;
  }
  MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
-               << ", current op num: " << op_info_.size();
+               << ", current op num: " << op_info_.size() << " is_dynamic_shape:" << is_dynamic_shape;
  return nullptr;
 }
--- a/Show More
+++ b/Show More