diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
index d45df4109d..5b16e6d8a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
@@ -289,14 +289,14 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
   return true;
 }
 
-uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) {
+uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) {
   // deal1: unknown shape type
   auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
   info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
   info->infoLen = sizeof(int32_t);
   ext_info_offset += kExtInfoHeadSize;
   auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
-  *shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
+  *shape_type = type;
   ext_info_offset += info->infoLen;
   return ext_info_offset;
 }
@@ -401,7 +401,11 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
   ext_info.resize(ext_info_len, 0);
   char *ext_info_buf = ext_info.data();
 
-  ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset);
+  UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;
+  if (AnfAlgo::GetCNodeName(anf_node) == "Unique") {
+    shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
+  }
+  ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type);
   ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);
   ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);
 
diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
index 89e4c8362d..6ca596a6a3 100644
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <map>
 #include <set>
+#include <stack>
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "base/core_ops.h"
@@ -30,6 +31,7 @@
 #include "backend/kernel_compiler/kernel_build_info.h"
 #include "common/trans.h"
 #include "abstract/param_validator.h"
+#include "abstract/primitive_infer_map.h"
 #include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/trace_base.h"
 
@@ -820,6 +822,8 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr &
 void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &types,
                                                      const std::vector<std::vector<size_t>> &shapes, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
+  auto node_ptr = node->cast<AnfNodePtr>();
+  MS_EXCEPTION_IF_NULL(node_ptr);
   if (types.size() != shapes.size()) {
     MS_LOG(EXCEPTION) << "Types size " << types.size() << "should be same with shapes size " << shapes.size()
                       << " trace: " << trace::DumpSourceLines(node);
@@ -829,16 +833,23 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &
   } else if (shapes.size() == 1) {
     // single output handle
     ShapeVector shape_int;
+    auto max_shape = GetOutputMaxShape(node_ptr, 0);
+    auto min_shape = GetOutputMinShape(node_ptr, 0);
     std::transform(shapes[0].begin(), shapes[0].end(), std::back_inserter(shape_int), SizeToLong);
-    auto abstract = std::make_shared<AbstractTensor>(TypeIdToType(types[0]), shape_int);
+    auto abstract = std::make_shared<AbstractTensor>(
+      TypeIdToType(types[0]), std::make_shared<abstract::Shape>(shape_int, min_shape, max_shape));
     node->set_abstract(abstract);
   } else {
     // multiple output handle
     std::vector<AbstractBasePtr> abstract_list;
     for (size_t i = 0; i < types.size(); ++i) {
       ShapeVector shape_int;
+      auto max_shape = GetOutputMaxShape(node_ptr, i);
+      auto min_shape = GetOutputMinShape(node_ptr, i);
       std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(shape_int), SizeToLong);
-      abstract_list.emplace_back(std::make_shared<AbstractTensor>(TypeIdToType(types[i]), shape_int));
+      auto abstract = std::make_shared<AbstractTensor>(
+        TypeIdToType(types[i]), std::make_shared<abstract::Shape>(shape_int, min_shape, max_shape));
+      abstract_list.emplace_back(abstract);
     }
     auto abstract_tuple = std::make_shared<AbstractTuple>(abstract_list);
     node->set_abstract(abstract_tuple);
@@ -1409,7 +1420,7 @@ std::vector<int64_t> AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &an
   }
 }
 
-bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
+bool IsNodeOutputDynamicShape(const CNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
   auto base_shape = node->Shape();
   if (base_shape == nullptr) {
@@ -1436,6 +1447,66 @@ bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
   return false;
 }
 
+bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
+  MS_EXCEPTION_IF_NULL(anf_node_ptr);
+  auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
+  for (size_t i = 0; i < input_num; ++i) {
+    auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
+    auto input = input_with_index.first;
+    auto index = input_with_index.second;
+    MS_EXCEPTION_IF_NULL(input);
+
+    auto base_shape = input->Shape();
+    if (base_shape == nullptr) {
+      MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope();
+      continue;
+    }
+    if (base_shape->isa<abstract::Shape>()) {
+      if (IsShapeDynamic(base_shape->cast<abstract::ShapePtr>())) {
+        return true;
+      }
+    } else if (base_shape->isa<abstract::TupleShape>()) {
+      auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_shape);
+
+      if (index >= tuple_shape->size()) {
+        MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index
+                     << " and tuple_shape size:" << tuple_shape->size();
+        continue;
+      }
+
+      auto b_shp = (*tuple_shape)[index];
+      if (!b_shp->isa<abstract::Shape>()) {
+        continue;
+      }
+      if (IsShapeDynamic(b_shp->cast<abstract::ShapePtr>())) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (!node->isa<CNode>()) {
+    MS_LOG(WARNING) << "Node is not a cnode";
+    return false;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  auto in_dynamic = IsNodeInputDynamicShape(cnode);
+  auto out_dynamic = IsNodeOutputDynamicShape(cnode);
+  if (in_dynamic && !AnfAlgo::HasNodeAttr(kAttrInputIsDynamicShape, cnode)) {
+    AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode);
+    MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
+  }
+  if (out_dynamic && !AnfAlgo::HasNodeAttr(kAttrOutputIsDynamicShape, cnode)) {
+    AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode);
+    MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
+  }
+  return in_dynamic || out_dynamic;
+}
+
 std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
   auto device_shape = GetInputDeviceShape(anf_node, index);
   // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
@@ -1500,5 +1571,50 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std::
     GetAllFatherRealNode(cnode->input(kDependAttachNodeIndex), result, visited);
   }
 }
+
+void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  MS_LOG(INFO) << "InferShape start, node:" << node->DebugString();
+  auto inputs = node->inputs();
+  if (inputs.empty()) {
+    MS_LOG(EXCEPTION) << "Invalid inputs";
+  }
+  AbstractBasePtrList args_spec_list;
+  auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
+  auto input_size = AnfAlgo::GetInputTensorNum(node);
+  for (size_t i = 0; i < input_size; ++i) {
+    auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
+    auto real_input = input_with_index.first;
+    MS_EXCEPTION_IF_NULL(real_input);
+    auto cnode_input = node->input(i + 1);
+    MS_EXCEPTION_IF_NULL(cnode_input);
+    if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
+      auto base_shape = real_input->Shape();
+      if (!base_shape->isa<abstract::TupleShape>()) {
+        MS_LOG(EXCEPTION) << "Node:" << node->DebugString()
+                          << " input is a tuple_get_item but real input node shape is not a TupleShape";
+      }
+      auto tuple_ptr = base_shape->cast<abstract::TupleShapePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_ptr);
+      auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
+      auto real_shape = tuple_ptr->shape().at(tuple_get_item_index);
+      auto abstract_tensor = cnode_input->abstract()->cast<abstract::AbstractTensorPtr>();
+      MS_EXCEPTION_IF_NULL(abstract_tensor);
+      args_spec_list.emplace_back(std::make_shared<abstract::AbstractTensor>(abstract_tensor->element(), real_shape));
+    } else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
+      args_spec_list.emplace_back(cnode_input->abstract());
+    } else {
+      args_spec_list.emplace_back(real_input->abstract());
+    }
+  }
+  auto &prim_eval_implement_map = abstract::GetPrimitiveToEvalImplMap();
+  auto ret = prim_eval_implement_map.find(primitive);
+  if (ret == prim_eval_implement_map.end()) {
+    MS_LOG(EXCEPTION) << "Get infer shape function failed, primitive name:" << primitive->name()
+                      << " primitive type:" << primitive->type_name();
+  }
+  auto eval_result = ret->second.impl_(nullptr, primitive, args_spec_list);
+  node->set_abstract(eval_result);
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
index 56c039ba2b..4e24af27cc 100644
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
@@ -230,6 +230,7 @@ class AnfRuntimeAlgorithm {
   static std::vector<int64_t> GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index);
   static std::vector<int64_t> GetOutputMinShape(const AnfNodePtr &anf_node, size_t index);
   static bool IsNodeDynamicShape(const AnfNodePtr &node);
+  static void InferShape(const CNodePtr &node);
   static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
   static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
   // Find control_depend real input nodes.
diff --git a/mindspore/ccsrc/backend/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc
index bac5d1625b..32f2352dab 100644
--- a/mindspore/ccsrc/backend/session/cpu_session.cc
+++ b/mindspore/ccsrc/backend/session/cpu_session.cc
@@ -65,6 +65,8 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr
   auto graph_id = graph_sum_;
   auto graph = ConstructKernelGraph(lst, outputs);
   MS_EXCEPTION_IF_NULL(graph);
+  UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
+  graph->UpdateGraphDynamicAttr();
   MS_LOG(INFO) << "Set kernel info";
   SetKernelInfo(graph.get());
 #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
@@ -87,7 +89,7 @@ void CPUSession::CreateOutputTensors(const GraphId &graph_id, const std::vector<
                                      std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   auto kernel_graph = GetGraph(graph_id);
   MS_EXCEPTION_IF_NULL(kernel_graph);
-  runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs);
+  runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs, tensor_to_node);
 }
 
 void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc
index ddecff5a08..bac09b1434 100644
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -47,6 +47,41 @@ static std::shared_ptr<std::map<ValuePtr, ParameterPtr>> python_paras;
 void ClearPythonParasMap() { python_paras = nullptr; }
 namespace {
 const int kSummaryGetItem = 2;
+bool IsUsedByRealKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(manager);
+  MS_EXCEPTION_IF_NULL(node);
+  auto node_users = manager->node_users()[node];
+  for (auto item : node_users) {
+    if (AnfAlgo::IsRealKernel(item.first)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool IsUsedByDynamicKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(manager);
+  MS_EXCEPTION_IF_NULL(node);
+  auto node_users = manager->node_users()[node];
+  for (auto item : node_users) {
+    if (item.first->isa<CNode>() && AnfAlgo::IsNodeDynamicShape(item.first->cast<CNodePtr>())) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CheckIfNeedCreateOutputTensor(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->isa<Parameter>()) {
+    auto node_ptr = node->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(node_ptr);
+    if (!node_ptr->is_used_by_real_kernel()) {
+      return true;
+    }
+  }
+  return false;
+}
 
 ValuePtr GetParamDefaultValue(const AnfNodePtr &node) {
   if (node == nullptr) {
@@ -114,6 +149,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(graph);
   MS_EXCEPTION_IF_NULL(tensor_to_node);
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
   MS_LOG(INFO) << "Create tensor for output[" << node->DebugString() << "] index[" << node_output_pair.second << "]";
   // if node is a value node, no need sync addr from device to host
   if (node->isa<ValueNode>()) {
@@ -121,7 +158,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
     MS_EXCEPTION_IF_NULL(value_node);
     return value_node->value();
   }
-  if (!AnfAlgo::OutputAddrExist(node, output_index)) {
+  if (!AnfAlgo::OutputAddrExist(node, output_index) ||
+      (CheckIfNeedCreateOutputTensor(node) && ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode)) {
     if (node->isa<Parameter>()) {
       for (size_t input_idx = 0; input_idx < graph->inputs().size(); input_idx++) {
         if (input_idx >= input_tensors.size()) {
@@ -875,9 +913,21 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
 
   // Update Graph Dynamic Shape Attr
   UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
-
   opt::BackendCommonOptimization(graph);
   graph->SetInputNodes();
+  auto input_nodes = graph->input_nodes();
+  for (auto input_node : input_nodes) {
+    if (input_node->isa<Parameter>()) {
+      auto node_ptr = input_node->cast<ParameterPtr>();
+      MS_EXCEPTION_IF_NULL(node_ptr);
+      if (!IsUsedByRealKernel(manager, input_node)) {
+        node_ptr->set_used_by_real_kernel();
+      }
+      if (IsUsedByDynamicKernel(manager, input_node)) {
+        node_ptr->set_used_by_dynamic_kernel();
+      }
+    }
+  }
   graph->SetOptimizerFlag();
   return graph;
 }
@@ -950,7 +1000,22 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructKernelGraph(const FuncGraphP
       MS_LOG_EXCEPTION << "construct func graph " << func_graph->ToString() << "fail!";
     }
   }
+
   AddParameterToGraphInputs(func_graph->parameters(), graph.get());
+  FuncGraphManagerPtr manager = MakeManager({graph});
+  auto input_nodes = graph->inputs();
+  for (auto input_node : input_nodes) {
+    if (input_node->isa<Parameter>()) {
+      auto node_ptr = input_node->cast<ParameterPtr>();
+      MS_EXCEPTION_IF_NULL(node_ptr);
+      if (!IsUsedByRealKernel(manager, input_node)) {
+        node_ptr->set_used_by_real_kernel();
+      }
+      if (IsUsedByDynamicKernel(manager, input_node)) {
+        node_ptr->set_used_by_dynamic_kernel();
+      }
+    }
+  }
   graph->SetExecOrderByDefault();
   if (ExistSummaryNode(graph.get())) {
     graph->set_summary_node_exist(true);
@@ -1021,14 +1086,23 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
     MS_EXCEPTION_IF_NULL(tensor);
     auto input_node = input_nodes[i];
     MS_EXCEPTION_IF_NULL(input_node);
+    auto size = LongToSize(tensor->data().nbytes());
+    if (input_node->isa<Parameter>() && input_node->cast<ParameterPtr>()->is_used_by_dynamic_kernel()) {
+      auto tensor_shape = tensor->shape();
+      std::vector<size_t> shape_tmp;
+      (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize);
+      AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp},
+                                          input_node.get());
+      size = trans::ShapeSize(shape_tmp) * trans::TypeIdSize(tensor->data_type());
+    }
     if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0) && TensorNeedSync(input_node, tensor)) {
       auto device_address = AnfAlgo::GetMutableOutputAddr(input_node, 0);
       MS_EXCEPTION_IF_NULL(device_address);
-      if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0),
-                                            LongToSize(tensor->data().nbytes()), tensor->data_type(),
-                                            tensor->data_c())) {
+      if (size != 0 && !device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0), size,
+                                                         tensor->data_type(), tensor->data_c())) {
         MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
       }
+
       if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode ||
           AnfAlgo::IsParameterWeight(input_node->cast<ParameterPtr>())) {
         tensor->set_device_address(device_address);
@@ -1543,55 +1617,6 @@ void SessionBasic::RunGraphAsync(const GraphId &graph_id, const std::vector<tens
   executor_->RunGraphAsync(shared_from_this(), graph_id, inputs, outputs);
 }
 
-bool IsDynamicShape(const NotNull<abstract::ShapePtr> &shape) {
-  return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < 0; });
-}
-
-bool IsNodeOutputDynamicShape(const CNodePtr &anf_node_ptr) {
-  MS_EXCEPTION_IF_NULL(anf_node_ptr);
-  return AnfAlgo::IsNodeDynamicShape(anf_node_ptr);
-}
-
-bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
-  MS_EXCEPTION_IF_NULL(anf_node_ptr);
-  auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
-  for (size_t i = 0; i < input_num; ++i) {
-    auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
-    auto input = input_with_index.first;
-    auto index = input_with_index.second;
-    MS_EXCEPTION_IF_NULL(input);
-
-    auto base_shape = input->Shape();
-    if (base_shape == nullptr) {
-      MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope();
-      continue;
-    }
-    if (base_shape->isa<abstract::Shape>()) {
-      if (IsDynamicShape(NOT_NULL(base_shape->cast<abstract::ShapePtr>()))) {
-        return true;
-      }
-    } else if (base_shape->isa<abstract::TupleShape>()) {
-      auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
-      MS_EXCEPTION_IF_NULL(tuple_shape);
-
-      if (index >= tuple_shape->size()) {
-        MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index
-                     << " and tuple_shape size:" << tuple_shape->size();
-        continue;
-      }
-
-      auto b_shp = (*tuple_shape)[index];
-      if (!b_shp->isa<abstract::Shape>()) {
-        continue;
-      }
-      if (IsDynamicShape(NOT_NULL(b_shp->cast<abstract::ShapePtr>()))) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphPtr> &all_graphs) {
   bool is_dynamic = false;
   for (const auto &graph : all_graphs) {
@@ -1605,20 +1630,10 @@ void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphP
 
 void SessionBasic::UpdateGraphDynamicShapeAttr(const NotNull<KernelGraphPtr> &root_graph) {
   for (const auto &cnode : root_graph->execution_order()) {
-    auto output_dynamic = IsNodeOutputDynamicShape(NOT_NULL(cnode));
-    auto input_dynamic = IsNodeInputDynamicShape(NOT_NULL(cnode));
-    if (output_dynamic || input_dynamic) {
+    if (AnfAlgo::IsNodeDynamicShape(cnode)) {
       AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), cnode);
       MS_LOG(INFO) << "Set Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
     }
-    if (output_dynamic) {
-      AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode);
-      MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
-    }
-    if (input_dynamic) {
-      AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode);
-      MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
-    }
   }
   root_graph->UpdateGraphDynamicAttr();
 }
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
index 6e32793b81..8a08c796c9 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@@ -532,7 +532,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size
   }
   if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NDHWC) {
     if (type_id_ == type) {
-      SyncMemory(ptr_, host_ptr, size_, RT_MEMCPY_HOST_TO_DEVICE);
+      SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE);
       sync_ok = true;
     } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
       sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size);
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
index 2f58516ec7..59b82717c5 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
@@ -66,11 +66,15 @@ void AiCpuDynamicKernel::Initialize() {
   input_num_ = AnfAlgo::GetInputTensorNum(cnode_ptr_);
   output_num_ = AnfAlgo::GetOutputTensorNum(cnode_ptr_);
 
+  UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;
+  if (AnfAlgo::GetCNodeName(cnode_ptr_) == "Unique") {
+    shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
+  }
   // Parse aicpu ext info
   if (is_dynamic_shape_) {
     MS_EXCEPTION_IF_NULL(cnode_ptr_);
     ext_info_handler_ =
-      std::make_shared<AicpuExtInfoHandler>(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, DEPEND_COMPUTE);
+      std::make_shared<AicpuExtInfoHandler>(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, shape_type);
     ext_info_handler_->Parse(ext_info_data_);
   }
 
diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
index f841a6e3c4..41d37dd135 100644
--- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
@@ -19,6 +19,7 @@
 #include <memory>
 #include <numeric>
 #include <utility>
+#include <algorithm>
 #include <functional>
 #include "backend/kernel_compiler/kernel.h"
 #include "runtime/device/cpu/cpu_device_address.h"
@@ -129,9 +130,11 @@ DeviceAddressPtr CPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t
   return std::make_shared<CPUDeviceAddress>(device_ptr, device_size, format, type_id);
 }
 
-tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node,
-                                                         size_t index) {
+tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(
+  session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index,
+  std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(tensor_to_node);
   size_t output_size = AnfAlgo::GetOutputTensorNum(node);
   if (index >= output_size) {
     MS_LOG(EXCEPTION) << "Invalid input index " << index;
@@ -166,13 +169,16 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *k
     }
     (void)bound_addresses_.insert(address);
   }
+  session::KernelWithIndex node_index(node, index);
   tensor->SetNeedWait(true);
   tensor->SetIsGraphOutput();
+  (*tensor_to_node)[tensor] = node_index;
   return tensor;
 }
 
 BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph,
-                                               const session::KernelWithIndex &kernel_with_index) {
+                                               const session::KernelWithIndex &kernel_with_index,
+                                               std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   auto &input_node = kernel_with_index.first;
   auto index = kernel_with_index.second;
   MS_EXCEPTION_IF_NULL(input_node);
@@ -183,12 +189,12 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap
       VectorRef ret;
       for (size_t i = 1; i < node->inputs().size(); i++) {
         auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node->input(i), 0);
-        auto out = CreatTensorForOutput(kernel_graph, item_with_index);
+        auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node);
         ret.push_back(out);
       }
       return ret;
     }
-    return CreatTensorForOutput(kernel_graph, node, index);
+    return CreatTensorForOutput(kernel_graph, node, index, tensor_to_node);
   } else if (input_node->isa<Parameter>()) {
     auto iter = input_param_tensor_map_.find(input_node);
     if (iter != input_param_tensor_map_.end()) {
@@ -203,9 +209,11 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap
 }
 
 void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph,
-                                           const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
+                                           const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs,
+                                           std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   MS_EXCEPTION_IF_NULL(kernel_graph);
   MS_EXCEPTION_IF_NULL(outputs);
+  MS_EXCEPTION_IF_NULL(tensor_to_node);
   auto &input_nodes = kernel_graph->inputs();
   if (input_nodes.size() != inputs.size()) {
     MS_LOG(EXCEPTION) << "Input size not equal to input node size!";
@@ -222,7 +230,7 @@ void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph,
   auto output_nodes = kernel_graph->outputs();
   for (const auto &item : output_nodes) {
     auto item_with_index = AnfAlgo::VisitKernelWithReturnType(item, 0, true);
-    auto out = CreatTensorForOutput(kernel_graph, item_with_index);
+    auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node);
     outputs->push_back(std::move(out));
   }
 }
@@ -258,6 +266,12 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker
           MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!";
         }
       }
+      if (item->cast<ParameterPtr>()->is_used_by_dynamic_kernel()) {
+        auto tensor_shape = tensor->shape();
+        std::vector<size_t> shape_tmp;
+        (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize);
+        AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(item, 0)}, {shape_tmp}, item.get());
+      }
       address->ref_count_ = INIT_NODE_REF;
       tensor->set_device_address(address);
     }
@@ -325,6 +339,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
 #ifdef ENABLE_PROFILE
     double start_time = GetTime();
 #endif
+    if (AnfAlgo::IsDynamicShape(kernel)) {
+      AnfAlgo::InferShape(kernel);
+    }
     std::vector<kernel::AddressPtr> kernel_inputs;
     std::vector<kernel::AddressPtr> kernel_workspaces;
     std::vector<kernel::AddressPtr> kernel_outputs;
diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
index 928ca9430b..4574bb4fd9 100644
--- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
@@ -39,7 +39,7 @@ class CPUKernelRuntime : public KernelRuntime {
   bool Run(session::KernelGraph *graph, bool is_task_sink) override;
   void AssignKernelAddress(session::KernelGraph *kernel_graph);
   void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
-                           VectorRef *outputs);
+                           VectorRef *outputs, std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
   void BindInputOutput(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
                        VectorRef *outputs);
   void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
@@ -53,8 +53,10 @@ class CPUKernelRuntime : public KernelRuntime {
                                        TypeId type_id) override;
 
  private:
-  tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index);
-  BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index);
+  tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index,
+                                         std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
+  BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index,
+                               std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
   void BindInputTensorAddressPtr(const session::KernelGraph &graph, const std::vector<tensor::TensorPtr> &inputs);
   void BindOutputTensorAddressPtr(const VectorRef *outputs);
   void AssignValueNodeAddress(session::KernelGraph *kernel_graph);
diff --git a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
index bc3165133c..ec009b5604 100644
--- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
@@ -51,17 +51,6 @@ void DynamicKernel::Initialize() {
 
 int DynamicKernel::GetKernelType() { return AnfAlgo::GetKernelType(cnode_ptr_); }
 
-bool IsTupleGetItem(const AnfNodePtr &anf_node) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  if (!anf_node->isa<CNode>()) {
-    return false;
-  }
-  auto cnode = anf_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto input0 = cnode->input(0);
-  return IsPrimitive(input0, prim::kPrimTupleGetItem);
-}
-
 void DynamicKernel::RebuildDependTensor() {
   depend_tensor_map_.clear();
   for (auto depend : depend_list_) {
@@ -112,7 +101,7 @@ void DynamicKernel::InferShape() {
 
     auto cnode_input = cnode_ptr_->input(i + 1);
     MS_EXCEPTION_IF_NULL(cnode_input);
-    if (IsTupleGetItem(cnode_input)) {
+    if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
       auto base_shape = real_input->Shape();
       if (!base_shape->isa<abstract::TupleShape>()) {
         MS_LOG(EXCEPTION) << "Node:" << cnode_ptr_->fullname_with_scope()
diff --git a/mindspore/core/abstract/abstract_value.h b/mindspore/core/abstract/abstract_value.h
index 126398b70a..109336e481 100644
--- a/mindspore/core/abstract/abstract_value.h
+++ b/mindspore/core/abstract/abstract_value.h
@@ -259,6 +259,13 @@ class AbstractUndetermined : public AbstractBase {
     }
     set_shape(std::make_shared<Shape>(shape));
   }
+  explicit AbstractUndetermined(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared<Shape>())
+      : AbstractBase(kAnyValue), element_(std::make_shared<AbstractScalar>(kAnyValue, element_type)) {
+    if (element_type == nullptr) {
+      MS_LOG(EXCEPTION) << "element_type is nullptr";
+    }
+    set_shape(shape);
+  }
   ~AbstractUndetermined() override = default;
   MS_DECLARE_PARENT(AbstractUndetermined, AbstractBase)
   TypePtr BuildType() const override { return std::make_shared<UndeterminedType>(); }
@@ -277,6 +284,8 @@ class AbstractTensor : public AbstractUndetermined {
       : AbstractUndetermined(element, shape) {}
   AbstractTensor(const TypePtr &element_type, const ShapeVector &shape) : AbstractUndetermined(element_type, shape) {}
   explicit AbstractTensor(const tensor::TensorPtr &tensor) : AbstractUndetermined(tensor->Dtype(), tensor->shape()) {}
+  explicit AbstractTensor(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared<Shape>())
+      : AbstractUndetermined(element_type, shape) {}
   ~AbstractTensor() override = default;
   MS_DECLARE_PARENT(AbstractTensor, AbstractUndetermined)
 
diff --git a/mindspore/core/abstract/utils.cc b/mindspore/core/abstract/utils.cc
index b250a0b2d3..35d65f394d 100644
--- a/mindspore/core/abstract/utils.cc
+++ b/mindspore/core/abstract/utils.cc
@@ -26,6 +26,12 @@
 
 namespace mindspore {
 namespace abstract {
+const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1},    {kNumberTypeInt, 4},     {kNumberTypeInt8, 1},
+                                           {kNumberTypeInt16, 2},   {kNumberTypeInt32, 4},   {kNumberTypeInt64, 8},
+                                           {kNumberTypeUInt, 4},    {kNumberTypeUInt8, 1},   {kNumberTypeUInt16, 2},
+                                           {kNumberTypeUInt32, 4},  {kNumberTypeUInt64, 8},  {kNumberTypeFloat, 4},
+                                           {kNumberTypeFloat16, 2}, {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}};
+
 ValuePtr ValueJoin(const ValuePtr &value1, const ValuePtr &value2) {
   MS_EXCEPTION_IF_NULL(value1);
   MS_EXCEPTION_IF_NULL(value2);
@@ -291,5 +297,18 @@ ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tenso
   auto y_shape = tensor_y_shape->shape();
   return std::make_shared<Shape>(RealBroadcast(op, x_shape, y_shape));
 }
+
+size_t TypeIdSize(const TypeId data_type) {
+  const size_t unsupported_type_error = 0;
+  auto iter = type_map.find(data_type);
+  if (iter != type_map.end()) {
+    return iter->second;
+  }
+  return unsupported_type_error;
+}
+
+size_t ShapeSize(const std::vector<size_t> &shape) {
+  return std::accumulate(shape.begin(), shape.end(), IntToSize(1), std::multiplies<size_t>());
+}
 }  // namespace abstract
 }  // namespace mindspore
diff --git a/mindspore/core/abstract/utils.h b/mindspore/core/abstract/utils.h
index 3287e15184..76e108bc2b 100644
--- a/mindspore/core/abstract/utils.h
+++ b/mindspore/core/abstract/utils.h
@@ -51,6 +51,9 @@ int64_t GetPositiveAxis(int64_t axis_value, size_t increment);
 
 ShapeVector BroadcastShape(ShapeVector shpx, ShapeVector shpy);
 
+size_t TypeIdSize(const TypeId data_type);
+size_t ShapeSize(const std::vector<size_t> &shape);
+
 // Get broadcasted shape for binary element-wise operation
 ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tensor_x, const AbstractTensorPtr &tensor_y);
 }  // namespace abstract
diff --git a/mindspore/core/ir/anf.h b/mindspore/core/ir/anf.h
index e3dbd23dac..62c1c164a7 100644
--- a/mindspore/core/ir/anf.h
+++ b/mindspore/core/ir/anf.h
@@ -322,9 +322,17 @@ class Parameter : public ANode {
     return shared_from_this() == other.shared_from_this();
   }
 
+  void set_used_by_real_kernel() { is_real_kernel_used_ = false; }
+  bool is_used_by_real_kernel() { return is_real_kernel_used_; }
+
+  void set_used_by_dynamic_kernel() { is_used_by_dynamic_kernel_ = true; }
+  bool is_used_by_dynamic_kernel() { return is_used_by_dynamic_kernel_; }
+
  private:
   std::string name_;
   bool has_default_;
+  bool is_real_kernel_used_ = true;
+  bool is_used_by_dynamic_kernel_ = false;
   ValuePtr default_param_;
   // The count of graphs using the parameter.
   int used_graph_count_;
diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc
index a814ed26e2..0096832d3a 100644
--- a/mindspore/core/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@@ -29,6 +29,7 @@
 #include <type_traits>
 #include <typeinfo>
 
+#include "abstract/utils.h"
 #include "abstract/abstract_value.h"
 
 namespace mindspore {
@@ -581,8 +582,11 @@ void Tensor::data_sync(bool need_wait) const {
   if (device_sync_ == nullptr) {
     return;
   }
+  std::vector<size_t> shape_tmp;
+  (void)std::transform(shape().begin(), shape().end(), std::back_inserter(shape_tmp), IntToSize);
+  auto size = abstract::ShapeSize(shape_tmp) * abstract::TypeIdSize(data_type());
   auto address = device_sync_;
-  if (!address->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
+  if (size != 0 && !address->SyncDeviceToHost(shape(), size, data_type(), data_c())) {
     MS_LOG(EXCEPTION) << "SyncDeviceToHost failed.";
   }
   sync_status_ = kNeedSyncHostToDevice;
diff --git a/tests/st/dynamic_shape/test_ascend_cpu.py b/tests/st/dynamic_shape/test_ascend_cpu.py
new file mode 100644
index 0000000000..e3c4938d60
--- /dev/null
+++ b/tests/st/dynamic_shape/test_ascend_cpu.py
@@ -0,0 +1,70 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.unique = P.Unique().add_prim_attr("primitive_target", "CPU")
+
+    def construct(self, x):
+        x, y = self.unique(x)
+        return (x, y)
+
+
+class UniqueSquare(nn.Cell):
+    def __init__(self):
+        super(UniqueSquare, self).__init__()
+        self.unique = P.Unique().add_prim_attr("primitive_target", "CPU")
+        self.square = P.Square()
+
+    def construct(self, x):
+        x, _ = self.unique(x)
+        return self.square(x)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_unique_ascend():
+    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
+    unique = Net()
+    output = unique(x)
+    expect1 = np.array([1, 2, 3])
+    expect2 = np.array([0, 0, 1, 1, 2, 2])
+    assert (output[0].asnumpy() == expect1).all()
+    assert (output[1].asnumpy() == expect2).all()
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_unique_square():
+    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
+    net = UniqueSquare()
+    output = net(x)
+    expect1 = np.array([1, 4, 9])
+    assert (output.asnumpy() == expect1).all()
diff --git a/tests/st/dynamic_shape/test_unique_cpu.py b/tests/st/dynamic_shape/test_unique_cpu.py
new file mode 100644
index 0000000000..6e2b57ab09
--- /dev/null
+++ b/tests/st/dynamic_shape/test_unique_cpu.py
@@ -0,0 +1,69 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
+
+context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.unique = P.Unique()
+
+    def construct(self, x):
+        return self.unique(x)
+
+
+class UniqueSquare(nn.Cell):
+    def __init__(self):
+        super(UniqueSquare, self).__init__()
+        self.unique = P.Unique()
+        self.square = P.Square()
+
+    def construct(self, x):
+        x, _ = self.unique(x)
+        return self.square(x)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_unique_cpu():
+    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
+    unique = Net()
+    output = unique(x)
+    expect1 = np.array([1, 2, 3])
+    expect2 = np.array([0, 0, 1, 1, 2, 2])
+    assert (output[0].asnumpy() == expect1).all()
+    assert (output[1].asnumpy() == expect2).all()
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_unique_square():
+    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
+    net = UniqueSquare()
+    output = net(x)
+    expect1 = np.array([1, 4, 9])
+    assert (output.asnumpy() == expect1).all()