diff --git a/CMakeLists.txt b/CMakeLists.txt
index 987e4ae709..c4da105cac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,10 @@ else()
     set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 endif()
 
+if (ENABLE_PYTHON)
+    add_compile_definitions(ENABLE_PYTHON)
+endif()
+
 set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
diff --git a/RELEASE.md b/RELEASE.md
index 4b829152a2..def72cbb20 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo
 
 Contributions of any kind are welcome!
 
+# Release 0.3.1-alpha
+
+## Major Features and Improvements
+
+### Ascend 910 Training and Inference Framework
+* Frontend and User Interface
+    * Independent model init interface.
+* Data processing, augmentation, and save format
+    * Support sample padding for minddataset.
+
+## Bugfixes
+* Python API
+    * Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
+* Data processing
+    * Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))
+
 # Release 0.3.0-alpha
 
 ## Major Features and Improvements
diff --git a/build.sh b/build.sh
index 059478b9af..cfa657ff3e 100755
--- a/build.sh
+++ b/build.sh
@@ -24,8 +24,8 @@ usage()
 {
   echo "Usage:"
   echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
-  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
+  echo "              [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -48,6 +48,7 @@ usage()
   echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
   echo "    -Q Enable dump memory, default off"
   echo "    -D Enable dumping of function graph ir, default on"
+  echo "    -S Enable async data dump, default off"
   echo "    -z Compile dataset & mindrecord, default on"
   echo "    -M Enable MPI and NCCL for GPU training, gpu default on"
   echo "    -V Specify the minimum required cuda version, default CUDA 10.1"
@@ -56,6 +57,7 @@ usage()
   echo "    -s Enable serving module, default off"
   echo "    -B Enable debugger, default off"
   echo "    -E Enable IBVERBS for parameter server, default off"
+  echo "    -l Compile with python dependency, default on"
 }
 
 # check value of input is 'on' or 'off'
@@ -87,6 +89,7 @@ checkopts()
   ENABLE_TIMELINE="off"
   ENABLE_DUMP2PROTO="on"
   ENABLE_DUMPE2E="off"
+  ENABLE_DATA_DUMP="off"
   ENABLE_DUMP_IR="on"
   COMPILE_MINDDATA="on"
   ENABLE_MPI="off"
@@ -98,9 +101,10 @@ checkopts()
   ENABLE_SERVING="off"
   ENABLE_DEBUGGER="off"
   ENABLE_IBVERBS="off"
+  ENABLE_PYTHON="on"
 
   # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
   do
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
     case "${opt}" in
@@ -151,6 +155,10 @@ checkopts()
         check_on_off $OPTARG p
         ENABLE_PROFILE="$OPTARG"
         ;;
+      l)
+        check_on_off $OPTARG l
+        ENABLE_PYTHON="$OPTARG"
+        ;;
       i)
         INC_BUILD="on"
         ;;
@@ -212,6 +220,11 @@ checkopts()
         ENABLE_DUMPE2E="$OPTARG"
         echo "enable dump end to end"
         ;;
+      S)
+        check_on_off $OPTARG S
+        ENABLE_DATA_DUMP="$OPTARG"
+        echo "enable data dump"
+        ;;
       D)
         check_on_off $OPTARG D
         ENABLE_DUMP_IR="$OPTARG"
@@ -315,7 +328,11 @@ build_mindspore()
     if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
     fi
+    if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
+    fi
     CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
+    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
     if [[ "X$ENABLE_MPI" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
     fi
diff --git a/cmake/external_libs/icu4c.cmake b/cmake/external_libs/icu4c.cmake
index 7d13e4fd2a..af69328e55 100644
--- a/cmake/external_libs/icu4c.cmake
+++ b/cmake/external_libs/icu4c.cmake
@@ -9,11 +9,11 @@ else()
             LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
             URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
             MD5 0c2662a2b0bc80b0eb56495205247c8f
-            CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json
+            CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
             )
     include_directories(${icu4c_INC})
     add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON})
     add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA})
     add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N})
     add_definitions(-D ENABLE_ICU4C)
-endif()
\ No newline at end of file
+endif()
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index 63a65cd533..9002c23976 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
 
-if (ENABLE_DEBUGGER)
+if (ENABLE_DEBUGGER OR ENABLE_SERVING)
     # build dependencies of gRPC
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
@@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
 if(USE_GLOG)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
 endif()
-if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
 endif()
diff --git a/cmake/options.cmake b/cmake/options.cmake
index 18db942d68..2470c25a90 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
 option(ENABLE_AKG "enable akg" OFF)
 option(ENABLE_DEBUGGER "enable debugger" OFF)
 option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
+option(ENABLE_PYTHON "Enable python" ON)
 
 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     if (WIN32)
@@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E)
     add_compile_definitions(ENABLE_DUMP_E2E)
 endif()
 
+if(ENABLE_DATA_DUMP)
+    add_compile_definitions(ENABLE_DATA_DUMP)
+endif()
+
 if(ENABLE_DEBUGGER)
     add_compile_definitions(ENABLE_DEBUGGER)
 endif()
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 2fde01af4f..7b3c2f7bb2 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -213,7 +213,6 @@ install(
         ${CMAKE_SOURCE_DIR}/mindspore/parallel
         ${CMAKE_SOURCE_DIR}/mindspore/mindrecord
         ${CMAKE_SOURCE_DIR}/mindspore/train
-        ${CMAKE_SOURCE_DIR}/mindspore/model_zoo
         ${CMAKE_SOURCE_DIR}/mindspore/common
         ${CMAKE_SOURCE_DIR}/mindspore/ops
         ${CMAKE_SOURCE_DIR}/mindspore/communication
@@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
         COMPONENT mindspore
     )
 endif ()
+
+if (ENABLE_SERVING)
+    install(
+        TARGETS ms_serving
+        DESTINATION ${INSTALL_BASE_DIR}
+        COMPONENT mindspore
+    )
+
+    install(
+        TARGETS inference
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
diff --git a/config/data_dump.json b/config/data_dump.json
new file mode 100644
index 0000000000..fc08f78590
--- /dev/null
+++ b/config/data_dump.json
@@ -0,0 +1,15 @@
+{
+  "DumpSettings": {
+    "net_name": "ResNet50",
+    "mode": 1,
+    "iteration": 0,
+    "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
+  },
+
+  "DumpSettingsSpec": {
+    "net_name": "net name eg:ResNet50",
+    "mode": "0: dump all kernels, 1: dump kernels in kernels list",
+    "iteration": "specified iteration ",
+    "kernels": "op's full scope name which need to be dump"
+  }
+}
\ No newline at end of file
diff --git a/config/op_info.config b/config/op_info.config
new file mode 100644
index 0000000000..6ab9eba875
--- /dev/null
+++ b/config/op_info.config
@@ -0,0 +1,383 @@
+{"op_name": "InitData", "inputs": [], "outputs": [], "attr": [{"name": "queue_name", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [], "imply_type": "AiCPU"}
+{"op_name": "DropoutGenMask", "inputs": [{"index": 0, "name": "x1", "param_type": "required"}, {"index": 1, "name": "x2", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "Seed0", "type": "int"}, {"name": "Seed1", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "NCHW"], ["float16", "NCHW"], ["uint8", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "GetNext", "inputs": [], "outputs": [{"index": 0, "name": "y", "param_type": "dynamic"}], "attr": [{"name": "shared_name", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"]], [["int8", "DefaultFormat"]], [["int16", "DefaultFormat"]], [["int32", "DefaultFormat"]], [["int64", "DefaultFormat"]], [["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"]], [["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Print", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "TopK", "inputs": [{"index": 0, "name": "intput", "param_type": "required"}, {"index": 1, "name": "k", "param_type": "required"}], "outputs": [{"index": 0, "name": "values", "param_type": "required"}, {"index": 1, "name": "indices", "param_type": "required"}], "attr": [{"name": "sorted", "type": "bool"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "IsFinite", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["bool", "NCHW"]], [["int16", "NCHW"], ["bool", "NCHW"]], [["int32", "NCHW"], ["bool", "NCHW"]], [["int64", "NCHW"], ["bool", "NCHW"]], [["uint8", "NCHW"], ["bool", "NCHW"]], [["uint16", "NCHW"], ["bool", "NCHW"]], [["uint32", "NCHW"], ["bool", "NCHW"]], [["uint64", "NCHW"], ["bool", "NCHW"]], [["float16", "NCHW"], ["bool", "NCHW"]], [["float32", "NCHW"], ["bool", "NCHW"]], [["float64", "NCHW"], ["bool", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Reshape", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Flatten", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Squeeze", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "ExpandDims", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "RandomChoiceWithMask", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}, {"index": 1, "name": "mask", "param_type": "required"}], "attr": [{"name": "count", "type": "int"}, {"name": "seed", "type": "int"}, {"name": "seed2", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "NCHW"], ["int32", "NCHW"], ["bool", "NCHW"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "axis", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Normal", "inputs": [{"index": 0, "name": "shape", "param_type": "required"}, {"index": 1, "name": "mean", "param_type": "required"}, {"index": 2, "name": "stddev", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "seed", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "CTCLoss", "inputs": [{"index": 0, "name": "inputs", "param_type": "required"}, {"index": 1, "name": "labels_indices", "param_type": "required"}, {"index": 2, "name": "labels_values", "param_type": "required"}, {"index": 3, "name": "sequence_length", "param_type": "required"}], "outputs": [{"index": 0, "name": "loss", "param_type": "required"}, {"index": 1, "name": "gradient", "param_type": "required"}], "attr": [{"name": "preprocess_collapse_repeated", "type": "bool"}, {"name": "ctc_merge_repeated", "type": "bool"}, {"name": "ignore_longer_outputs_than_inputs", "type": "bool"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["float32", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"], ["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "ReverseSequence", "inputs": [{"index": 0, "name": "x", "param_type": "required"}, {"index": 1, "name": "seq_lengths", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "seq_dim", "type": "int"}, {"name": "batch_dim", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int32", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int32", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["int32", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int32", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int32", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int32", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["int32", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["int32", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["int32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["int32", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["int32", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int32", "NCHW"], ["float64", "NCHW"]], [["bool", "DefaultFormat"], ["int64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int64", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["int64", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int64", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int64", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["int64", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["int64", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["int64", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["int64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["int64", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["int64", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "CropAndResize", "inputs": [{"index": 0, "name": "image", "param_type": "required"}, {"index": 1, "name": "boxes", "param_type": "required"}, {"index": 2, "name": "box_index", "param_type": "required"}, {"index": 3, "name": "crop_size", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "method", "type": "str"}, {"name": "extrapolation_value", "type": "float"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int64", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float64", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["uint8", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["uint16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]]], "imply_type": "AiCPU"}
+{"op_name": "EndOfSequence", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Abs", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "AddN", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "dynamic", "name": "inputs"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "TensorAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ApplyMomentum", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool"}, {"name": "gradient_scale", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "variable"}, {"index": 1, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "accumulation"}, {"index": 2, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "learning_rate"}, {"index": 3, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "gradient"}, {"index": 4, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "momentum"}], "outputs": [{"index": 0, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "output"}]}
+{"op_name": "Assign", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "ref"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "value"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "output"}]}
+{"op_name": "InplaceAssign", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "fake_output", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "y"}, {"index": 2, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "z"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "output"}]}
+{"op_name": "AssignAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "ref"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "value"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "BiasAddGrad", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "dout"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "BiasAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "b"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Cast", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "dst_type", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "bool", "bool", "float16", "float32", "int32", "int32", "bool", "float16", "float32", "bool", "bool", "float16", "float32", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float32", "float16", "int32", "float16", "int32", "int32", "float16", "float32", "float32", "float32", "float16", "int32", "float32", "float32", "float16", "int32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ClearZero", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "pad_mod", "param_type": "optional", "type": "string"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": []}
+{"op_name": "ConvBN1", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "w_shape", "param_type": "required", "type": "listInt"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "w"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "conv_res_16"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "var_part"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}]}
+{"op_name": "Conv2DBackpropFilter", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}, {"name": "filter_sizes", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "out_backprop"}, {"index": 1, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "input"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["FracZ"], "name": "output"}]}
+{"op_name": "Conv2DBackpropInput", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt"}, {"name": "filter_shape", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "out_backprop"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "filter"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "Conv2D", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "w_shape", "param_type": "required", "type": "listInt"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "w"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "Div", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "EqualCount", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "x"}, {"index": 1, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "output"}]}
+{"op_name": "Exp", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Five2Four", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "shape4d", "param_type": "required", "type": "listInt"}, {"name": "dstType", "param_type": "required", "type": "str"}, {"name": "output_format", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float16", "float16", "float32", "float16", "float32"], "format": ["NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float32", "float32", "float32"], "format": ["DefaultFormat", "NHWC", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC"], "name": "output"}]}
+{"op_name": "Four2Five", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}, {"name": "dst_type", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float32", "float16", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float16", "float16", "float32"], "format": ["NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBatchNormGrad", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_inv_variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dx"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "bn_scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "bn_bias"}]}
+{"op_name": "FusedBatchNormInfer", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}, {"name": "epsilon", "param_type": "optional", "type": "float"}, {"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "b"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "y"}]}
+{"op_name": "FusedBatchNorm", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}, {"name": "epsilon", "param_type": "optional", "type": "float"}, {"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "b"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "y"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_mean"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_inv_variance"}]}
+{"op_name": "BNGrad1", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "mean"}], "outputs": [{"index": 0, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN1", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data"}], "outputs": [{"index": 0, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "BNGrad2", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "eps", "param_type": "optional", "type": "float"}, {"name": "data_shape", "param_type": "optional", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dgamma_red_hw"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dbeta_red_hw"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "gamma"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN2", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "var_part"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_mean"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_var"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "BNGrad3", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "rs"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dgamma_dx"}, {"index": 3, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dbeta_dx"}, {"index": 4, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data_minus_mean"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN3", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "eps", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "data"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "gamma"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "beta"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "GatherV2", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "axis", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "params"}, {"index": 1, "dtype": ["int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "indices"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Less", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool"], "format": ["DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Log", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "MatMul", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "transpose_a", "param_type": "optional", "type": "bool"}, {"name": "transpose_b", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "x1"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "x2"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "BatchMatMul", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "transpose_a", "param_type": "optional", "type": "bool"}, {"name": "transpose_b", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "x1"}, {"index": 1, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "x2"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "MaxPoolGradWithArgmax", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "pad_mode", "param_type": "optional", "type": "str"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "argmax"}, {"index": 2, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "grad"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "MaxPoolWithArgmax", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "pad_mode", "param_type": "optional", "type": "str"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float16"], "format": ["DefaultFormat"], "name": "argmax"}]}
+{"op_name": "Max", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Maximum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SimpleMeanGrad", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "HEAD"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SimpleMean", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Minimum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Mul", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "y_shape", "param_type": "required", "type": "listInt"}, {"name": "data_format", "param_type": "required", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Neg", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "OneHot", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "depth", "param_type": "required", "type": "int"}, {"name": "axis", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "indices"}, {"index": 1, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "on_value"}, {"index": 2, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "off_value"}], "outputs": [{"index": 0, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Pow", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "power"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "RealDiv", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Reciprocal", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceMax", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceMean", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceSum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}, {"name": "atomic_add", "param_type": "optional", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ReluGrad", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "y_backprop"}, {"index": 1, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReLU", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Reshape", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "shape", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "tensor"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Round", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Rsqrt", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Select", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "condition"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 2, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Softmax", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SparseSoftmaxCrossEntropyWithLogits", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "is_grad", "param_type": "optional", "type": "bool"}, {"name": "sens", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["DefaultFormat"], "name": "features"}, {"index": 1, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "labels"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["DefaultFormat"], "name": "output"}]}
+{"op_name": "Sqrt", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "StridedSlice", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "begin", "param_type": "required", "type": "listInt"}, {"name": "end", "param_type": "required", "type": "listInt"}, {"name": "strides", "param_type": "required", "type": "listInt"}, {"name": "begin_mask", "param_type": "required", "type": "int"}, {"name": "end_mask", "param_type": "required", "type": "int"}, {"name": "ellipsis_mask", "param_type": "required", "type": "int"}, {"name": "new_axis_mask", "param_type": "required", "type": "int"}, {"name": "shrink_axis_mask", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Sub", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Sum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keepdims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Tile", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "multiples", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ZerosLike", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Argmax", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "axis", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["int32", "int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FloorDiv", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32", "int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Equal", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "GreaterEqual", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "LessEqual", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ExpandDims", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "axis", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "y"}]}
+{"op_name": "Greater", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float16", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "EquivFormat", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Cast", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Equal", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SimpleMean", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SimpleMeanGrad", "inputs": [{"index": 0, "name": "HEAD"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Mul", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "ReLU6", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "ReLU6Grad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Squeeze", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SqueezeGrad", "inputs": [{"index": 0, "name": "y_grad"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Tile", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "multiples", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSigmoid", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSigmoidGrad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSwish", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSwishGrad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Sub", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalAnd", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalNot", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalOr", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LessEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "NotEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "GreaterEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Abs", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "abs.so", "compute_cost": 10, "kernel_name": "abs", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "InplaceAdd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_add_d.so", "compute_cost": 10, "kernel_name": "inplace_add_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "InplaceSub", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_sub_d.so", "compute_cost": 10, "kernel_name": "inplace_sub_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AbsGrad", "inputs": [{"index": 0, "name": "y", "param_type": "required"}, {"index": 1, "name": "dy", "param_type": "required"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "abs_grad.so", "compute_cost": 10, "kernel_name": "abs_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ACos", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acos.so", "compute_cost": 10, "kernel_name": "acos", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ACosGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acos_grad.so", "compute_cost": 10, "kernel_name": "acos_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Acosh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acosh.so", "compute_cost": 10, "kernel_name": "acosh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AcoshGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acosh_grad.so", "compute_cost": 10, "kernel_name": "acosh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AdamApplyOneWithDecay", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "mul1_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "mul4_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "adam_apply_one_with_decay.so", "compute_cost": 10, "kernel_name": "adam_apply_one_with_decay", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Add", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add.so", "compute_cost": 10, "kernel_name": "add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "ApplyCenteredRMSProp", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mg", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "rho", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_centered_rms_prop.so", "compute_cost": 10, "kernel_name": "apply_centered_rms_prop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AddN", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "n", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add_n.so", "compute_cost": 10, "kernel_name": "add_n", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "AccumulateNV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "n", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "accumulate_n_v2.so", "compute_cost": 10, "kernel_name": "accumulate_n_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ApplyFtrl", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "lr_power", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_ftrl.so", "compute_cost": 10, "kernel_name": "apply_ftrl", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyMomentum", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_momentum.so", "compute_cost": 10, "kernel_name": "apply_momentum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Adam", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "beta2_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "beta1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "beta2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}, {"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adam.so", "compute_cost": 10, "kernel_name": "apply_adam", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdaMax", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "beta2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_ada_max_d.so", "compute_cost": 10, "kernel_name": "apply_ada_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdadelta", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum_update", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "rho", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum_update", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adadelta_d.so", "compute_cost": 10, "kernel_name": "apply_adadelta_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "update_slots", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adagrad_d.so", "compute_cost": 10, "kernel_name": "apply_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdagradV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adagradv2_d.so", "compute_cost": 10, "kernel_name": "apply_adagradv2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAddSign", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "sign_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_add_sign_d.so", "compute_cost": 10, "kernel_name": "apply_add_sign_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyPowerSign", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "logbase", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "sign_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_power_sign_d.so", "compute_cost": 10, "kernel_name": "apply_power_sign_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyGradientDescent", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "delta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_gradient_descent.so", "compute_cost": 10, "kernel_name": "apply_gradient_descent", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyProximalGradientDescent", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "delta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_proximal_gradient_descent.so", "compute_cost": 10, "kernel_name": "apply_proximal_gradient_descent", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyFtrlV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "l1", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2_shrinkage", "param_type": "required", "type": "float", "value": "all"}, {"name": "lr_power", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_ftrl_v2_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_ftrl_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyAdagradV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_adagrad_v2_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_adagrad_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApproximateEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "tolerance", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "approximate_equal.so", "compute_cost": 10, "kernel_name": "approximate_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "AdamApplyOne", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "mul1_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "adam_apply_one.so", "compute_cost": 10, "kernel_name": "adam_apply_one", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Assign", "inputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NC1HWC0"], ["bool", "NC1HWC0"], ["bool", "NC1HWC0"]], [["bool", "C1HWNCoC0"], ["bool", "C1HWNCoC0"], ["bool", "C1HWNCoC0"]], [["bool", "FracZ"], ["bool", "FracZ"], ["bool", "FracZ"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["int16", "C1HWNCoC0"], ["int16", "C1HWNCoC0"], ["int16", "C1HWNCoC0"]], [["int16", "FracZ"], ["int16", "FracZ"], ["int16", "FracZ"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["uint16", "C1HWNCoC0"], ["uint16", "C1HWNCoC0"], ["uint16", "C1HWNCoC0"]], [["uint16", "FracZ"], ["uint16", "FracZ"], ["uint16", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["uint32", "C1HWNCoC0"], ["uint32", "C1HWNCoC0"], ["uint32", "C1HWNCoC0"]], [["uint32", "FracZ"], ["uint32", "FracZ"], ["uint32", "FracZ"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"]], [["int64", "FracZ"], ["int64", "FracZ"], ["int64", "FracZ"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["uint64", "C1HWNCoC0"], ["uint64", "C1HWNCoC0"], ["uint64", "C1HWNCoC0"]], [["uint64", "FracZ"], ["uint64", "FracZ"], ["uint64", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assign.so", "compute_cost": 10, "kernel_name": "assign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AssignAdd", "inputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"]], [["int64", "FracZ"], ["int64", "FracZ"], ["int64", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assignadd.so", "compute_cost": 10, "kernel_name": "assignadd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AssignSub", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assign_sub.so", "compute_cost": 10, "kernel_name": "assign_sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BatchMatMul", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "transpose_x1", "param_type": "required", "type": "bool", "value": "all"}, {"name": "transpose_x2", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_matmul.so", "compute_cost": 10, "kernel_name": "batch_matmul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BatchNorm", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "variance", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_1", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "reserve_space_2", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "is_training", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_norm.so", "compute_cost": 10, "kernel_name": "batch_norm", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BatchNormGrad", "inputs": [{"index": 0, "name": "y_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "reserve_space_2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "x_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "scale_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_4", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "reserve_space_5", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "is_training", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batchnormgrad.so", "compute_cost": 10, "kernel_name": "batchnormgrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BiasAdd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "bias", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "COMMREDUCE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bias_add.so", "compute_cost": 10, "kernel_name": "bias_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BiasAddGrad", "inputs": [{"index": 0, "name": "output_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "COMMREDUCE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "biasaddgrad.so", "compute_cost": 10, "kernel_name": "biasaddgrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Cast", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["float16", ""]], [["bool", ""], ["uint8", ""]], [["bool", ""], ["float32", ""]], [["bool", ""], ["int32", ""]], [["int8", ""], ["float16", ""]], [["int8", ""], ["float32", ""]], [["int8", ""], ["int32", ""]], [["uint8", ""], ["float16", ""]], [["uint8", ""], ["float32", ""]], [["uint8", ""], ["int32", ""]], [["int32", ""], ["bool", ""]], [["int32", ""], ["float16", ""]], [["int32", ""], ["float32", ""]], [["int32", ""], ["int8", ""]], [["int32", ""], ["uint8", ""]], [["float16", ""], ["uint8", ""]], [["float16", ""], ["float32", ""]], [["float16", ""], ["int32", ""]], [["float32", ""], ["float16", ""]], [["float32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cast.so", "compute_cost": 10, "kernel_name": "cast", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Conv2D", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "offset_a", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["int8", ""], ["float16", ""]], [["int8", ""], ["int8", ""], ["int32", ""], ["int8", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d.so", "compute_cost": 10, "kernel_name": "conv2d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Conv2DBackpropFilter", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "filter_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "groups", "param_type": "optional", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_filter_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_filter_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Conv2DBackpropInput", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "group", "param_type": "optional", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "FracZ"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionMulGrad", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "", "compute_cost": 10, "kernel_name": "", "partial_flag": false, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DropoutDoMask", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "keep_prob", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "drop_out_do_mask.so", "compute_cost": 10, "kernel_name": "drop_out_do_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Gelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "GeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPool", "inputs": [{"index": 0, "name": "input_data", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool.so", "compute_cost": 10, "kernel_name": "max_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "argmax", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Mul", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "mul.so", "compute_cost": 10, "kernel_name": "mul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "RealDiv", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "realdiv.so", "compute_cost": 10, "kernel_name": "realdiv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu.so", "compute_cost": 10, "kernel_name": "relu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ReluGrad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relugrad.so", "compute_cost": 10, "kernel_name": "relugrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReLU6", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu6.so", "compute_cost": 10, "kernel_name": "relu6", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ReLU6Grad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu6_grad.so", "compute_cost": 10, "kernel_name": "relu6_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReLUV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu_v2.so", "compute_cost": 10, "kernel_name": "relu_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReluGradV2", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["uint8", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["uint8", "DefaultFormat"], ["float32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["uint8", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int8", "NC1HWC0"], ["uint8", "DefaultFormat"], ["int8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "DefaultFormat"], ["uint8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu_grad_v2.so", "compute_cost": 10, "kernel_name": "relu_grad_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SoftmaxCrossEntropyWithLogits", "inputs": [{"index": 0, "name": "input_features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_labels", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_loss", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output_backprop", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax_cross_entropy_with_logits.so", "compute_cost": 10, "kernel_name": "softmax_cross_entropy_with_logits", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SigmoidCrossEntropyWithLogits", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "target", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_cross_entropy_with_logits.so", "compute_cost": 10, "kernel_name": "sigmoid_cross_entropy_with_logits", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SigmoidCrossEntropyWithLogitsGrad", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "target", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dout", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "gradient", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_cross_entropy_with_logits_grad.so", "compute_cost": 10, "kernel_name": "sigmoid_cross_entropy_with_logits_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TensorAdd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add.so", "compute_cost": 10, "kernel_name": "add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "TransData", "inputs": [{"index": 0, "name": "src", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dst", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "src_format", "param_type": "required", "type": "str", "value": "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC"}, {"name": "dst_format", "param_type": "required", "type": "str", "value": "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NHWC"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NHWC"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "HWCN"], ["float32", "FracZ"]], [["float32", "FracZ"], ["float32", "HWCN"]], [["float32", "C1HWNCoC0"], ["float32", "HWCN"]], [["float32", "HWCN"], ["float32", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "NHWC"], ["float16", "FracZ"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "NHWC"], ["float16", "NC1HWC0"]], [["float16", "HWCN"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NHWC"]], [["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "FracZ"], ["float16", "HWCN"]], [["float16", "C1HWNCoC0"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]], [["bool", "NHWC"], ["bool", "NC1HWC0"]], [["bool", "DefaultFormat"], ["bool", "NC1HWC0"]], [["bool", "NC1HWC0"], ["bool", "NHWC"]], [["bool", "NC1HWC0"], ["bool", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "NHWC"]], [["float16", "DefaultFormat"], ["float16", "HWCN"]], [["float16", "NHWC"], ["float16", "DefaultFormat"]], [["float16", "NHWC"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "DefaultFormat"]], [["float16", "HWCN"], ["float16", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "HWCN"]], [["float32", "NHWC"], ["float32", "DefaultFormat"]], [["float32", "NHWC"], ["float32", "HWCN"]], [["float32", "HWCN"], ["float32", "DefaultFormat"]], [["float32", "HWCN"], ["float32", "NHWC"]], [["int8", "DefaultFormat"], ["int8", "FRACTAL_NZ"]], [["int8", "DefaultFormat"], ["int8", "FracZ"]], [["int8", "DefaultFormat"], ["int8", "NHWC"]], [["int8", "DefaultFormat"], ["int8", "HWCN"]], [["int8", "NHWC"], ["int8", "DefaultFormat"]], [["int8", "NHWC"], ["int8", "HWCN"]], [["int8", "HWCN"], ["int8", "DefaultFormat"]], [["int8", "HWCN"], ["int8", "NHWC"]], [["int16", "DefaultFormat"], ["int16", "NHWC"]], [["int16", "DefaultFormat"], ["int16", "HWCN"]], [["int16", "NHWC"], ["int16", "DefaultFormat"]], [["int16", "NHWC"], ["int16", "HWCN"]], [["int16", "HWCN"], ["int16", "DefaultFormat"]], [["int16", "HWCN"], ["int16", "NHWC"]], [["int32", "DefaultFormat"], ["int32", "NHWC"]], [["int32", "DefaultFormat"], ["int32", "HWCN"]], [["int32", "NHWC"], ["int32", "DefaultFormat"]], [["int32", "NHWC"], ["int32", "HWCN"]], [["int32", "HWCN"], ["int32", "DefaultFormat"]], [["int32", "HWCN"], ["int32", "NHWC"]], [["int64", "DefaultFormat"], ["int64", "NHWC"]], [["int64", "DefaultFormat"], ["int64", "HWCN"]], [["int64", "NHWC"], ["int64", "DefaultFormat"]], [["int64", "NHWC"], ["int64", "HWCN"]], [["int64", "HWCN"], ["int64", "DefaultFormat"]], [["int64", "HWCN"], ["int64", "NHWC"]], [["uint8", "DefaultFormat"], ["uint8", "NHWC"]], [["uint8", "DefaultFormat"], ["uint8", "HWCN"]], [["uint8", "NHWC"], ["uint8", "DefaultFormat"]], [["uint8", "NHWC"], ["uint8", "HWCN"]], [["uint8", "HWCN"], ["uint8", "DefaultFormat"]], [["uint8", "HWCN"], ["uint8", "NHWC"]], [["uint16", "DefaultFormat"], ["uint16", "NHWC"]], [["uint16", "DefaultFormat"], ["uint16", "HWCN"]], [["uint16", "NHWC"], ["uint16", "DefaultFormat"]], [["uint16", "NHWC"], ["uint16", "HWCN"]], [["uint16", "HWCN"], ["uint16", "DefaultFormat"]], [["uint16", "HWCN"], ["uint16", "NHWC"]], [["uint32", "DefaultFormat"], ["uint32", "NHWC"]], [["uint32", "DefaultFormat"], ["uint32", "HWCN"]], [["uint32", "NHWC"], ["uint32", "DefaultFormat"]], [["uint32", "NHWC"], ["uint32", "HWCN"]], [["uint32", "HWCN"], ["uint32", "DefaultFormat"]], [["uint32", "HWCN"], ["uint32", "NHWC"]], [["uint64", "DefaultFormat"], ["uint64", "NHWC"]], [["uint64", "DefaultFormat"], ["uint64", "HWCN"]], [["uint64", "NHWC"], ["uint64", "DefaultFormat"]], [["uint64", "NHWC"], ["uint64", "HWCN"]], [["uint64", "HWCN"], ["uint64", "DefaultFormat"]], [["uint64", "HWCN"], ["uint64", "NHWC"]], [["int32", "FRACTAL_NZ"], ["int32", "DefaultFormat"]], [["float16", "NDHWC"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NDHWC"]], [["int8", "HWCN"], ["int8", "C1HWNCoC0"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "FracZ"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "FRACTAL_NZ"]], [["float32", "HWCN"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "trans_data.so", "compute_cost": 10, "kernel_name": "trans_data", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TopK", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist_seq", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dim", "param_type": "optional", "type": "int", "value": "all"}, {"name": "k", "param_type": "required", "type": "int", "value": "all"}, {"name": "largest", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "sorted", "param_type": "optional", "type": "bool", "value": "true"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "top_k_d.so", "compute_cost": 10, "kernel_name": "top_k_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatMul", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "transpose_x1", "param_type": "required", "type": "bool", "value": "all"}, {"name": "transpose_x2", "param_type": "required", "type": "bool", "value": "all"}, {"name": "offset_x", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "DYNAMIC", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"], ["int8", "DefaultFormat"], ["float16", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "FRACTAL_NZ"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int8", "DefaultFormat"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["int8", "DefaultFormat"], ["int32", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matmul.so", "compute_cost": 10, "kernel_name": "matmul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sub", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sub.so", "compute_cost": 10, "kernel_name": "sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceMeanD", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_mean_d.so", "compute_cost": 10, "kernel_name": "reduce_mean_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ScatterNd", "inputs": [{"index": 0, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_d.so", "compute_cost": 10, "kernel_name": "scatter_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterNdD", "inputs": [{"index": 0, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_d.so", "compute_cost": 10, "kernel_name": "scatter_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReduceMean", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_mean.so", "compute_cost": 10, "kernel_name": "reduce_mean", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "Tile", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "multiples", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tile_d.so", "compute_cost": 10, "kernel_name": "tile_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AtomicAddrClean", "inputs": [], "outputs": [], "attr": [{"name": "automic_add_mem_size", "param_type": "required", "type": "listUInt64", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [], "imply_type": "TBE", "async_flag": false, "binfile_name": "atomic_addr_clean.so", "compute_cost": 10, "kernel_name": "atomic_addr_clean", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GatherV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int32", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int64", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int32", "FracZ"], ["int8", "FracZ"]], [["int8", "FracZ"], ["int64", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["int64", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["int32", "FracZ"], ["uint8", "FracZ"]], [["uint8", "FracZ"], ["int64", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int64", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "FracZ"], ["int64", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["int32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "FracZ"], ["float16", "FracZ"]], [["float16", "FracZ"], ["int64", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"]], [["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_v2_d.so", "compute_cost": 10, "kernel_name": "gather_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GatherNd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int64", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_nd.so", "compute_cost": 10, "kernel_name": "gather_nd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingReduce", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "outputs": [{"index": 0, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_reduce.so", "compute_cost": 10, "kernel_name": "bn_training_reduce", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingReduceGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "x_norm", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 2, "name": "diff_scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "diff_offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_reduce_grad.so", "compute_cost": 10, "kernel_name": "bn_training_reduce_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "factor", "param_type": "optional", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "isRef", "param_type": "optional", "type": "bool", "value": "all", "default_value": "true"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update.so", "compute_cost": 10, "kernel_name": "bn_training_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingUpdateGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 2, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "diff_scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "diff_offset", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_grad.so", "compute_cost": 10, "kernel_name": "bn_training_update_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNInfer", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_infer.so", "compute_cost": 10, "kernel_name": "bn_infer", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNInferGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "x_backprop", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_infer_grad.so", "compute_cost": 10, "kernel_name": "bn_infer_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Reciprocal", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reciprocal.so", "compute_cost": 10, "kernel_name": "reciprocal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "StridedSlice", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "end", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "end_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "ellipsis_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "new_axis_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "shrink_axis_mask", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_slice_d.so", "compute_cost": 10, "kernel_name": "strided_slice_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedSliceGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shapex", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "end", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "end_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "ellipsis_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "new_axis_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "shrink_axis_mask", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_slice_grad_d.so", "compute_cost": 10, "kernel_name": "strided_slice_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Split", "inputs": [{"index": 0, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_num", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "split_d.so", "compute_cost": 10, "kernel_name": "split_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Exp", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "exp.so", "compute_cost": 10, "kernel_name": "exp", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Expm1", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "expm1.so", "compute_cost": 10, "kernel_name": "expm1", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Elu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "alpha", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "elu.so", "compute_cost": 10, "kernel_name": "elu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "EluGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "activations", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "elu_grad.so", "compute_cost": 10, "kernel_name": "elu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Div", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "div.so", "compute_cost": 10, "kernel_name": "div", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Log", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log.so", "compute_cost": 10, "kernel_name": "log", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "FloorDiv", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floordiv.so", "compute_cost": 10, "kernel_name": "floordiv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ZerosLike", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "zeros_like.so", "compute_cost": 10, "kernel_name": "zeros_like", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Neg", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "neg.so", "compute_cost": 10, "kernel_name": "neg", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "NPUClearFloatStatus", "inputs": [{"index": 0, "name": "addr", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_clear_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_clear_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NPUGetFloatStatus", "inputs": [{"index": 0, "name": "addr", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_get_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_get_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NPUAllocFloatStatus", "inputs": [], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_alloc_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_alloc_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "OneHot", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "on_value", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "off_value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth", "param_type": "required", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "one_hot.so", "compute_cost": 10, "kernel_name": "one_hot", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Equal", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "equal.so", "compute_cost": 10, "kernel_name": "equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Less", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "less.so", "compute_cost": 10, "kernel_name": "less", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LessEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin_norm_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "begin_params_axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "less_equal.so", "compute_cost": 10, "kernel_name": "less_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LogicalAnd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_and.so", "compute_cost": 10, "kernel_name": "logical_and", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LogicalNot", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_not.so", "compute_cost": 10, "kernel_name": "logical_not", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "LogicalOr", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_or.so", "compute_cost": 10, "kernel_name": "logical_or", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceMax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["bool", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_max_d.so", "compute_cost": 10, "kernel_name": "reduce_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ReduceMin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_min_d.so", "compute_cost": 10, "kernel_name": "reduce_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ReduceSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_sum_d.so", "compute_cost": 10, "kernel_name": "reduce_sum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "Round", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "round.so", "compute_cost": 10, "kernel_name": "round", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Tanh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tanh.so", "compute_cost": 10, "kernel_name": "tanh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "TanhGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tanh_grad.so", "compute_cost": 10, "kernel_name": "tanh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Softmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax.so", "compute_cost": 10, "kernel_name": "softmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Softsign", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softsign.so", "compute_cost": 10, "kernel_name": "softsign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Softplus", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softplus.so", "compute_cost": 10, "kernel_name": "softplus", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SoftplusGrad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softplus_grad.so", "compute_cost": 10, "kernel_name": "softplus_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "SoftmaxGradExt", "inputs": [{"index": 0, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keepdims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax_grad_ext.so", "compute_cost": 10, "kernel_name": "softmax_grad_ext", "partial_flag": true, "reshape_type": "", "dynamic_format": true, "op_pattern": "dynamicFormat"}
+{"op_name": "Square", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square.so", "compute_cost": 10, "kernel_name": "square", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sqrt", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sqrt.so", "compute_cost": 10, "kernel_name": "sqrt", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SparseApplyFtrl", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "l1", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2", "param_type": "required", "type": "float", "value": "all"}, {"name": "lr_power", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_ftrl.so", "compute_cost": 10, "kernel_name": "sparse_apply_ftrl", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyProximalAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int16", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int16", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int16", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int64", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int64", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint16", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint16", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint16", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint64", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint64", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint64", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint64", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_proximal_adagrad.so", "compute_cost": 10, "kernel_name": "sparse_apply_proximal_adagrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyProximalAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_proximal_adagrad_d.so", "compute_cost": 10, "kernel_name": "apply_proximal_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Transpose", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "perm", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "transpose_d.so", "compute_cost": 10, "kernel_name": "transpose_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "UnsortedSegmentSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_sum_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_sum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "UnsortedSegmentProd", "inputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "DefaultFormat"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_prod_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_prod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LogSoftmaxGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log_softmax_grad.so", "compute_cost": 10, "kernel_name": "log_softmax_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LogSoftmax", "inputs": [{"index": 0, "name": "logits", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "logsoftmax", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log_softmax.so", "compute_cost": 10, "kernel_name": "log_softmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Select", "inputs": [{"index": 0, "name": "condition", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "select.so", "compute_cost": 10, "kernel_name": "select", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Pow", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pow.so", "compute_cost": 10, "kernel_name": "pow", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Maximum", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "maximum.so", "compute_cost": 10, "kernel_name": "maximum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Minimum", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "minimum.so", "compute_cost": 10, "kernel_name": "minimum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MinimumGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "grad_x", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "grad_y", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "minimum_grad.so", "compute_cost": 10, "kernel_name": "minimum_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MaximumGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "grad_x", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "grad_y", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "maximum_grad.so", "compute_cost": 10, "kernel_name": "maximum_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Concat", "inputs": [{"index": 0, "name": "input_values", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "concat_d.so", "compute_cost": 10, "kernel_name": "concat_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Slice", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "size", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "slice_d.so", "compute_cost": 10, "kernel_name": "slice_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sign", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sign.so", "compute_cost": 10, "kernel_name": "sign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Greater", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "greater.so", "compute_cost": 10, "kernel_name": "greater", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ClipByNormNoDivSum", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "clip_by_norm_no_div_sum.so", "compute_cost": 10, "kernel_name": "clip_by_norm_no_div_sum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ClipByValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "clip_value_min", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "clip_value_max", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "clip_by_value.so", "compute_cost": 10, "kernel_name": "clip_by_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LayerNormBetaGammaBackprop", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "pd_beta", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape_gamma", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_beta_gamma_backprop.so", "compute_cost": 10, "kernel_name": "layer_norm_beta_gamma_backprop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "LayerNorm", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin_norm_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "begin_params_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm.so", "compute_cost": 10, "kernel_name": "layer_norm", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "LayerNormGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "pd_gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "pd_beta", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_grad.so", "compute_cost": 10, "kernel_name": "layer_norm_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LayerNormXBackprop", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_x", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_x_backprop.so", "compute_cost": 10, "kernel_name": "layer_norm_x_backprop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "L2Loss", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_loss.so", "compute_cost": 10, "kernel_name": "l2_loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "L2Normalize", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_normalize.so", "compute_cost": 10, "kernel_name": "l2_normalize", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "L2NormalizeGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dx", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_normalize_grad.so", "compute_cost": 10, "kernel_name": "l2_normalize_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumV1", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_v1.so", "compute_cost": 10, "kernel_name": "square_sum_v1", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumV2", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_v2.so", "compute_cost": 10, "kernel_name": "square_sum_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionTransposeD", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "perm", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "transpose_first", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_transpose_d.so", "compute_cost": 10, "kernel_name": "confusion_transpose_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "ConfusionSoftmaxGrad", "inputs": [{"index": 0, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_softmax_grad.so", "compute_cost": 10, "kernel_name": "confusion_softmax_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambUpdateWithLrV2", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "x5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "greater_y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "select_e", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_update_with_lr_v2.so", "compute_cost": 10, "kernel_name": "lamb_update_with_lr_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextMV", "inputs": [{"index": 0, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input6", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input7", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "input8", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "input9", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "inputx0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "inputx1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 11, "name": "inputx2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 12, "name": "inputx3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "output4", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_m_v.so", "compute_cost": 10, "kernel_name": "lamb_next_m_v", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextMVWithDecay", "inputs": [{"index": 0, "name": "input_mul3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_mul2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input_realdiv1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input_mul1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input_mul0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input_realdiv0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input_mul4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul1_sub", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "mul3_sub1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 11, "name": "mul4_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 12, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y3", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 3, "name": "y4", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_m_v_with_decay.so", "compute_cost": 10, "kernel_name": "lamb_next_m_v_with_decay", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambUpdateWithLR", "inputs": [{"index": 0, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input6", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input7", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "input8", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "input9", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_update_with_lr.so", "compute_cost": 10, "kernel_name": "lamb_update_with_lr", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Rsqrt", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "rsqrt.so", "compute_cost": 10, "kernel_name": "rsqrt", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sigmoid", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid.so", "compute_cost": 10, "kernel_name": "sigmoid", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SigmoidGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_grad.so", "compute_cost": 10, "kernel_name": "sigmoid_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeNearestNeighbor", "inputs": [{"index": 0, "name": "images", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_nearest_neighbor_d.so", "compute_cost": 10, "kernel_name": "resize_nearest_neighbor_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeNearestNeighborGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_nearest_neighbor_grad_d.so", "compute_cost": 10, "kernel_name": "resize_nearest_neighbor_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Pad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "paddings", "param_type": "optional", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pad_d.so", "compute_cost": 10, "kernel_name": "pad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ArgMaxWithValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "indice", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_max_with_value.so", "compute_cost": 10, "kernel_name": "arg_max_with_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ArgMinWithValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "indice", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_min_with_value.so", "compute_cost": 10, "kernel_name": "arg_min_with_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SmoothL1Loss", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "label", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "sigma", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "smooth_l1_loss.so", "compute_cost": 10, "kernel_name": "smooth_l1_loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SmoothL1LossGrad", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "label", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dout", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "sigma", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "smooth_l1_loss_grad.so", "compute_cost": 10, "kernel_name": "smooth_l1_loss_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulAdd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_add.so", "compute_cost": 10, "kernel_name": "fused_mul_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "FusedMulAddN", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_add_n.so", "compute_cost": 10, "kernel_name": "fused_mul_add_n", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulApplyMomentum", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_apply_momentum.so", "compute_cost": 10, "kernel_name": "fused_mul_apply_momentum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Fill", "inputs": [{"index": 0, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dims", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int8", "FracZ"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fill_d.so", "compute_cost": 10, "kernel_name": "fill_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Erf", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "erf.so", "compute_cost": 10, "kernel_name": "erf", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Erfc", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "erfc.so", "compute_cost": 10, "kernel_name": "erfc", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "DepthwiseConv2dNative", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}, {"name": "offset_a", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthwiseConv2dNativeBackpropFilter", "inputs": [{"index": 0, "name": "input", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "filter_grad", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "filter_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d_backprop_filter_d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d_backprop_filter_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthwiseConv2dNativeBackpropInput", "inputs": [{"index": 0, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "input_grad", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "C1HWNCoC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GreaterEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "greater_equal.so", "compute_cost": 10, "kernel_name": "greater_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "NotEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "not_equal.so", "compute_cost": 10, "kernel_name": "not_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "FloorMod", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floor_mod.so", "compute_cost": 10, "kernel_name": "floor_mod", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ScatterNdUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_update.so", "compute_cost": 10, "kernel_name": "scatter_nd_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AvgPool", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "avg_pool.so", "compute_cost": 10, "kernel_name": "avg_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AvgPoolGrad", "inputs": [{"index": 0, "name": "input_grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mean_matrix", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 2, "name": "kernel_matrix", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "out_grad", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "x_origin", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "C1HWNCoC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "avg_pool_grad_d.so", "compute_cost": 10, "kernel_name": "avg_pool_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "OnesLike", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", ""], ["uint8", ""]], [["int8", ""], ["int8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "ones_like.so", "compute_cost": 10, "kernel_name": "ones_like", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BatchToSpace", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "crops", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_to_space_d.so", "compute_cost": 10, "kernel_name": "batch_to_space_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToBatch", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "paddings", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_batch_d.so", "compute_cost": 10, "kernel_name": "space_to_batch_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthToSpace", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NHWC"], ["float16", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint64", "NHWC"], ["uint64", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depth_to_space.so", "compute_cost": 10, "kernel_name": "depth_to_space", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToDepth", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NHWC"], ["float16", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint64", "NHWC"], ["uint64", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_depth.so", "compute_cost": 10, "kernel_name": "space_to_depth", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Floor", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floor.so", "compute_cost": 10, "kernel_name": "floor", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Ceil", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "ceil.so", "compute_cost": 10, "kernel_name": "ceil", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Log1p", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log1p.so", "compute_cost": 10, "kernel_name": "log1p", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ResizeBilinear", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "half_pixel_centers", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_bilinear_v2_d.so", "compute_cost": 10, "kernel_name": "resize_bilinear_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeBilinearGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "original_image", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "half_pixel_centers", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_bilinear_v2_grad.so", "compute_cost": 10, "kernel_name": "resize_bilinear_v2_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Flatten", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "flatten.so", "compute_cost": 10, "kernel_name": "flatten", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ROIAlign", "inputs": [{"index": 0, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "rois_n", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "spatial_scale", "param_type": "required", "type": "float", "value": "all"}, {"name": "pooled_height", "param_type": "required", "type": "int", "value": "all"}, {"name": "pooled_width", "param_type": "required", "type": "int", "value": "all"}, {"name": "sample_num", "param_type": "optional", "type": "int", "value": "all", "default_value": "2"}, {"name": "roi_end_mode", "param_type": "optional", "type": "0,1", "value": "1"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "roi_align.so", "compute_cost": 10, "kernel_name": "roi_align", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ROIAlignGrad", "inputs": [{"index": 0, "name": "ydiff", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "rois_n", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "xdiff", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "xdiff_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pooled_width", "param_type": "required", "type": "int", "value": "all"}, {"name": "pooled_height", "param_type": "required", "type": "int", "value": "all"}, {"name": "spatial_scale", "param_type": "required", "type": "float", "value": "all"}, {"name": "sample_num", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "roi_align_grad.so", "compute_cost": 10, "kernel_name": "roi_align_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BoundingBoxDecode", "inputs": [{"index": 0, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "deltas", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "bboxes", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "means", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "stds", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "max_shape", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "wh_ratio_clip", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bounding_box_decode.so", "compute_cost": 10, "kernel_name": "bounding_box_decode", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BoundingBoxEncode", "inputs": [{"index": 0, "name": "anchor_box", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ground_truth_box", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "delats", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "means", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "stds", "param_type": "optional", "type": "listFloat", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bounding_box_encode.so", "compute_cost": 10, "kernel_name": "bounding_box_encode", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "CheckValid", "inputs": [{"index": 0, "name": "bbox_tensor", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "img_tas", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "valid_tensor", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["int8", ""]], [["float16", ""], ["float16", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "check_valid.so", "compute_cost": 10, "kernel_name": "check_valid", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "IOU", "inputs": [{"index": 0, "name": "bboxes", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gtboxes", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "overlap", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "iou.so", "compute_cost": 10, "kernel_name": "iou", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Argmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_dtype", "param_type": "optional", "type": "type", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_max_d.so", "compute_cost": 10, "kernel_name": "arg_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NMSWithMask", "inputs": [{"index": 0, "name": "box_scores", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "selected_boxes", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "selected_idx", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "selected_mask", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "iou_threshold", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "nms_with_mask.so", "compute_cost": 10, "kernel_name": "nms_with_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SGD", "inputs": [{"index": 0, "name": "parameters", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gradient", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "learning_rate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "stat", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "parameters", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dampening", "param_type": "optional", "type": "float", "value": "all"}, {"name": "weight_decay", "param_type": "optional", "type": "float", "value": "all"}, {"name": "nesterov", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sgd.so", "compute_cost": 10, "kernel_name": "sgd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LARSUpdate", "inputs": [{"index": 0, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "g", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "w_square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "g_square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "weight_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "learning_rate", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "g_new", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "hyperpara", "param_type": "optional", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "use_clip", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lars_v2_update.so", "compute_cost": 10, "kernel_name": "lars_v2_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Argmin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_dtype", "param_type": "optional", "type": "type", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_min_d.so", "compute_cost": 10, "kernel_name": "arg_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingUpdateV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v2.so", "compute_cost": 10, "kernel_name": "bn_training_update_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingUpdateV3", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "reserve_2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v3.so", "compute_cost": 10, "kernel_name": "bn_training_update_v3", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumAll", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_all.so", "compute_cost": 10, "kernel_name": "square_sum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "NDHWC"], ["int8", "NDHWC"]], [["int16", "NDHWC"], ["int16", "NDHWC"]], [["int32", "NDHWC"], ["int32", "NDHWC"]], [["int64", "NDHWC"], ["int64", "NDHWC"]], [["uint8", "NDHWC"], ["uint8", "NDHWC"]], [["uint16", "NDHWC"], ["uint16", "NDHWC"]], [["uint32", "NDHWC"], ["uint32", "NDHWC"]], [["uint64", "NDHWC"], ["uint64", "NDHWC"]], [["float16", "NDHWC"], ["float16", "NDHWC"]], [["float32", "NDHWC"], ["float32", "NDHWC"]], [["bool", "NDHWC"], ["bool", "NDHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pack.so", "compute_cost": 10, "kernel_name": "pack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Unpack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "num", "param_type": "optional", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unpack.so", "compute_cost": 10, "kernel_name": "unpack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_update.so", "compute_cost": 10, "kernel_name": "scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PReLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "weight", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NCHW"], ["float16", "DefaultFormat"], ["float16", "NCHW"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NCHW"], ["float32", "DefaultFormat"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "prelu.so", "compute_cost": 10, "kernel_name": "prelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PReLUGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weights", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dx", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "da", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "DefaultFormat"], ["float32", "NCHW"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "prelu_grad.so", "compute_cost": 10, "kernel_name": "prelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BinaryCrossEntropy", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weight", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "reduction", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "binary_cross_entropy.so", "compute_cost": 10, "kernel_name": "binary_cross_entropy", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BinaryCrossEntropyGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad_output", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "weight", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "reduction", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "binary_cross_entropy_grad.so", "compute_cost": 10, "kernel_name": "binary_cross_entropy_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sin.so", "compute_cost": 10, "kernel_name": "sin", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Cos", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cos.so", "compute_cost": 10, "kernel_name": "cos", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "CumSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all", "default_value": "0"}, {"name": "exclusive", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "fales"}, {"name": "reverse", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cumsum_d.so", "compute_cost": 10, "kernel_name": "cumsum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyRMSProp", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "rho", "param_type": "required", "type": "float", "value": "all"}, {"name": "momentum", "param_type": "required", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_rms_prop.so", "compute_cost": 10, "kernel_name": "apply_rms_prop_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "CumProd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}, {"name": "exclusive", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "reverse", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cumprod_d.so", "compute_cost": 10, "kernel_name": "cumprod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReduceProd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_prod_d.so", "compute_cost": 10, "kernel_name": "reduce_prod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "FlattenGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reshape.so", "compute_cost": 10, "kernel_name": "reshape", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterAdd", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_add.so", "compute_cost": 10, "kernel_name": "scatter_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Atan2", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan2.so", "compute_cost": 10, "kernel_name": "atan2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BesselI0e", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bessel_i0e.so", "compute_cost": 10, "kernel_name": "bessel_i0e", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BesselI1e", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bessel_i1e.so", "compute_cost": 10, "kernel_name": "bessel_i1e", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BatchToSpaceND", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "attr": [{"name": "block_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "crops", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_to_space_nd_d.so", "compute_cost": 10, "kernel_name": "batch_to_space_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToBatchND", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "attr": [{"name": "block_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "paddings", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_batch_nd_d.so", "compute_cost": 10, "kernel_name": "space_to_batch_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BitwiseAnd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_and.so", "compute_cost": 10, "kernel_name": "bitwise_and", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BitwiseOr", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_or.so", "compute_cost": 10, "kernel_name": "bitwise_or", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BitwiseXor", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_xor.so", "compute_cost": 10, "kernel_name": "bitwise_xor", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceAll", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_all_d.so", "compute_cost": 10, "kernel_name": "reduce_all_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "SparseApplyAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_adagrad_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "UnsortedSegmentMin", "inputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "DefaultFormat"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_min_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Asin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asin.so", "compute_cost": 10, "kernel_name": "asin", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AsinGrad", "inputs": [{"index": 0, "name": "y", "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asin_grad.so", "compute_cost": 10, "kernel_name": "asin_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Asinh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asinh.so", "compute_cost": 10, "kernel_name": "asinh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AsinhGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asinh_grad.so", "compute_cost": 10, "kernel_name": "asinh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DivNoNan", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "div_no_nan.so", "compute_cost": 10, "kernel_name": "div_no_nan", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Atan", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan.so", "compute_cost": 10, "kernel_name": "atan", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AtanGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FRACTAL_NZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan_grad.so", "compute_cost": 10, "kernel_name": "atan_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Atanh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atanh.so", "compute_cost": 10, "kernel_name": "atanh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Cosh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cosh.so", "compute_cost": 10, "kernel_name": "cosh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sinh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sinh.so", "compute_cost": 10, "kernel_name": "sinh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Inv", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float32", ""], ["float32", ""]], [["float16", ""], ["float16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inv.so", "compute_cost": 10, "kernel_name": "inv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "InvGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["int8", ""], ["int8", ""], ["int8", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inv_grad.so", "compute_cost": 10, "kernel_name": "inv_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Invert", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "invert.so", "compute_cost": 10, "kernel_name": "invert", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BasicLSTMCell", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "h", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "c", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "b", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mask", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "ct", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ht", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "it", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "jt", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "ft", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 5, "name": "ot", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 6, "name": "tanhct", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "keep_prob", "param_type": "optional", "type": "float", "value": "all"}, {"name": "forget_bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "state_is_tuple", "param_type": "optional", "type": "bool", "value": "true"}, {"name": "activation", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float16", "FracZ"], ["float32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float32", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellCStateGrad", "inputs": [{"index": 0, "name": "c", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dht", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dct", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "it", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "jt", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "ft", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "ot", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "tanhct", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dct_1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "forget_bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "activation", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_c_state_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_c_state_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellWeightGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "h", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dw", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "db", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float32", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_weight_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_weight_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellInputGrad", "inputs": [{"index": 0, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dropout_mask", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "dxt", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dht", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "keep_prob", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["uint8", "DefaultFormat"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["uint8", "DefaultFormat"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_input_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_input_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionMatrix", "inputs": [{"index": 0, "name": "labels", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "predictions", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weights", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_classes", "param_type": "required", "type": "int", "value": "all"}, {"name": "dtype", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_matrix.so", "compute_cost": 10, "kernel_name": "confusion_matrix", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BroadcastTo", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "broadcast_to_d.so", "compute_cost": 10, "kernel_name": "broadcast_to_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedRead", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "stride", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_read.so", "compute_cost": 10, "kernel_name": "strided_read", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedWrite", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "stride", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_write.so", "compute_cost": 10, "kernel_name": "strided_write", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Range", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "start", "param_type": "required", "type": "float", "value": "all"}, {"name": "limit", "param_type": "required", "type": "float", "value": "all"}, {"name": "delta", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "range_d.so", "compute_cost": 10, "kernel_name": "range_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulAddNL2loss", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_addn_l2loss.so", "compute_cost": 10, "kernel_name": "fused_mul_addn_l2loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulApplyMomentumExtern", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "var_copy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "var_copy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float32", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "FracZ"], ["float32", "FracZ"], ["float16", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_apply_momentum_extern.so", "compute_cost": 10, "kernel_name": "fused_mul_apply_momentum_extern", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextRight", "inputs": [{"index": 0, "name": "input_square", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_mul2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "truediv1_recip", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_right.so", "compute_cost": 10, "kernel_name": "lamb_next_right", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseGatherV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int32", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int64", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int32", "FracZ"], ["int8", "FracZ"]], [["int8", "FracZ"], ["int64", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["int64", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["int32", "FracZ"], ["uint8", "FracZ"]], [["uint8", "FracZ"], ["int64", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int64", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "FracZ"], ["int64", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["int32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "FracZ"], ["float16", "FracZ"]], [["float16", "FracZ"], ["int64", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"]], [["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_v2_d.so", "compute_cost": 10, "kernel_name": "gather_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DataFormatDimMap", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "src_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "data_format_dim_map.so", "compute_cost": 10, "kernel_name": "data_format_dim_map", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "HistogramFixedWidth", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "range", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "nbins", "param_type": "required", "type": "int", "value": "all"}, {"name": "dtype", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "histogram_fixed_width_d.so", "compute_cost": 10, "kernel_name": "histogram_fixed_width_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TensorScatterUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tensor_scatter_update.so", "compute_cost": 10, "kernel_name": "tensor_scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "InplaceUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "INPLACE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_update_d.so", "compute_cost": 10, "kernel_name": "inplace_update_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SplitV", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "size_splits", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "split_dim", "param_type": "required", "type": "int", "value": "all"}, {"name": "num_split", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "split_v_d.so", "compute_cost": 10, "kernel_name": "split_v_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "InTopK", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "k", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "in_top_k.so", "compute_cost": 10, "kernel_name": "in_top_k", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LinSpace", "inputs": [{"index": 0, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "start", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "stop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "num", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", ""], ["float32", ""], ["float32", ""], ["int32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lin_space.so", "compute_cost": 10, "kernel_name": "lin_space", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MatrixDiag", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatrixDiagPart", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_part_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_part_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatrixSetDiag", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "diagonal", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LRN", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth_radius", "param_type": "optional", "type": "int", "value": "all", "default_value": "5"}, {"name": "bias", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}, {"name": "alpha", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}, {"name": "beta", "param_type": "optional", "type": "float", "value": "all", "default_value": "0.5"}, {"name": "norm_region", "param_type": "optional", "type": "str", "value": "all", "default_value": "ACROSS_CHANNELS"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lrn.so", "compute_cost": 10, "kernel_name": "lrn", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LRNGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth_radius", "param_type": "optional", "type": "int", "value": "all"}, {"name": "bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "alpha", "param_type": "optional", "type": "float", "value": "all"}, {"name": "beta", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NCHW"], ["float16", "NCHW"], ["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lrn_grad.so", "compute_cost": 10, "kernel_name": "lrn_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMax", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_max.so", "compute_cost": 10, "kernel_name": "scatter_max", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMin", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_min.so", "compute_cost": 10, "kernel_name": "scatter_min", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterSub", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_sub.so", "compute_cost": 10, "kernel_name": "scatter_sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMul", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_mul.so", "compute_cost": 10, "kernel_name": "scatter_mul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterDiv", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_div.so", "compute_cost": 10, "kernel_name": "scatter_div", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Mod", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "mod.so", "compute_cost": 10, "kernel_name": "mod", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MaxPoolGradGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGradGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PopulationCount", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "population_count.so", "compute_cost": 10, "kernel_name": "population_count", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ParallelConcat", "inputs": [{"index": 0, "name": "values", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "N", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NC1HWC0"], ["bool", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["bool", "NHWC"], ["bool", "NHWC"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint64", "NHWC"], ["uint64", "NHWC"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NHWC"], ["float16", "NHWC"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "parallel_concat.so", "compute_cost": 10, "kernel_name": "parallel_concat", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
diff --git a/graphengine b/graphengine
index 4084909d62..31aa96ef41 160000
--- a/graphengine
+++ b/graphengine
@@ -1 +1 @@
-Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b
+Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457
diff --git a/include/ms_tensor.h b/include/ms_tensor.h
index 1f9661df5e..fc59e12328 100644
--- a/include/ms_tensor.h
+++ b/include/ms_tensor.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <vector>
 #include <memory>
-#include "ir/dtype/type_id.h"
+#include "mindspore/core/ir/dtype/type_id.h"
 
 namespace mindspore {
 #define MS_API __attribute__((visibility("default")))
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index a6043eb787..9d715fdf53 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -334,7 +334,7 @@ class Parser:
     def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
         self.fn = fn
         self.parse_method = parse_method
-        _, self.line_offset = inspect.getsourcelines(self.fn)
+        self.line_offset = 0
         self.filename: str = inspect.getfile(self.fn)
 
         # Used to resolve the function's globals Namespace.
@@ -350,7 +350,8 @@ class Parser:
         logger.debug("fn = %r", self.fn)
         tree = None
         if isinstance(self.fn, (types.FunctionType, types.MethodType)):
-            original_src = inspect.getsource(self.fn)
+            lines, self.line_offset = inspect.getsourcelines(self.fn)
+            original_src = ''.join(lines)
             hexstr = hashlib.sha256(original_src.encode()).hexdigest()
             tree = Parser.ast_cache.get(hexstr)
             if not tree:
diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index 936099a4fb..d70c6edcf4 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -108,7 +108,8 @@ def enumerate_(x, start=0):
     """Enumerate list or tuple."""
     x_type = F.typeof(x)
     ret = ()
-    if check_is_tuple_or_list(x_type, "enumerate"):
+    op_name = "enumerate"
+    if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"):
         ret = zip(range(start, start + len(x)), x)
     return ret
 
@@ -123,11 +124,22 @@ def while_cond(x):
 
 
 @constexpr
-def check_is_tuple_or_list(x, op_name):
+def check_is_tuple_or_list(x, op_name, arg_name):
     """check whether x is list or tuple."""
     if isinstance(x, (mstype.list_type, mstype.tuple_type)):
         return True
-    raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
+    raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.")
+
+
+@constexpr
+def check_is_const_int(x, op_name, arg_name):
+    """check whether x is const int."""
+    if x is None:
+        raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.")
+    if not isinstance(x, int):
+        raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.")
+    return True
+
 
 @constexpr
 def check_is_tensor_bool_cond(shp):
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 9dc1502aa5..bb02f338f6 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -1,4 +1,5 @@
 ## common setting
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_BINARY_DIR})
 link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
@@ -35,20 +36,20 @@ if(ENABLE_GPU)
     include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})
 
     file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "device/gpu/*.cc"
-            "device/gpu/*.cu"
-            "kernel/gpu/*.cu"
-            "kernel/akg/gpu/*.cc"
-            "kernel/akg/akg_kernel_build.cc"
-            "kernel/akg/akg_kernel_attrs_process.cc"
+            "runtime/device/gpu/*.cc"
+            "runtime/device/gpu/*.cu"
+            "backend/kernel_compiler/gpu/*.cu"
+            "backend/kernel_compiler/akg/gpu/*.cc"
+            "backend/kernel_compiler/akg/akg_kernel_build.cc"
+            "backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
             )
 
     list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
-    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
-    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
-                                  "device/gpu/distribution/collective_wrapper.cc"
-                                  "device/gpu/distribution/mpi_wrapper.cc"
-                                  "device/gpu/distribution/nccl_wrapper.cc"
+    list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
+    list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
+                                  "runtime/device/gpu/distribution/collective_wrapper.cc"
+                                  "runtime/device/gpu/distribution/mpi_wrapper.cc"
+                                  "runtime/device/gpu/distribution/nccl_wrapper.cc"
                                   )
 
     set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@@ -56,6 +57,7 @@ if(ENABLE_GPU)
     set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
     cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
     set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
+    add_compile_definitions(ENABLE_GPU)
 endif ()
 
 ## make flatuffer files
@@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO)
 endif ()
 
 if (ENABLE_D)
-    include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
+    include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
     include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
-    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
+    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
     ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
     
     file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
     ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
 
+    file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
+    ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
+
     list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
     list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
 
     add_compile_definitions(ENABLE_D)
 endif ()
@@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST)
 endif()
 
 ## make sub objects
-set(SUB_COMP 
-    transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
-    pybind_api pynative session utils vm
+set(SUB_COMP
+        transform/graph_ir
+        transform/onnx
+        backend/optimizer
+        backend/kernel_compiler
+        backend/session
+        runtime/device
+        frontend/optimizer
+        frontend/parallel
+        frontend/operator
+        pipeline/jit
+        pipeline/pynative
+        common debug gvar predict pybind_api utils vm
 )
 
 foreach (_comp ${SUB_COMP})
     add_subdirectory(${_comp})
-    if (TARGET _mindspore_${_comp}_obj)
-        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
-        add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
+    string(REPLACE "/" "_" sub ${_comp})
+    if (TARGET _mindspore_${sub}_obj)
+        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
+        add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
     endif ()
 endforeach ()
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>)
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>)
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
+add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
 
 set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
 add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
@@ -204,8 +228,8 @@ endif()
 
 # set c_expression building
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
-pybind11_add_module(_c_expression "pipeline/init.cc")
+set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
+pybind11_add_module(_c_expression "pipeline/jit/init.cc")
 
 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -231,9 +255,11 @@ else ()
     target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
     target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
     target_link_libraries(_c_expression PRIVATE mindspore_gvar)
-    target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
-    if (${ENABLE_IBVERBS} STREQUAL "ON")
-        target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+    if (NOT ENABLE_GE)
+        target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
+        if (${ENABLE_IBVERBS} STREQUAL "ON")
+            target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+        endif()
     endif()
 endif ()
 
@@ -260,8 +286,8 @@ if (ENABLE_CPU)
 endif ()
 
 if (ENABLE_MINDDATA)
-    add_subdirectory(mindrecord)
-    add_subdirectory(dataset)
+    add_subdirectory(minddata/mindrecord)
+    add_subdirectory(minddata/dataset)
 endif ()
 
 # build inference
@@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
         )
 add_library(inference SHARED
-        ${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
         ${LOAD_ONNX_SRC}
         )
 target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
similarity index 73%
rename from mindspore/ccsrc/kernel/CMakeLists.txt
rename to mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
index ceea6b1a99..b412d83d11 100644
--- a/mindspore/ccsrc/kernel/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@@ -25,7 +25,15 @@ if (ENABLE_CPU)
     file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "cpu/*.cc"
     )
-    
+
+    list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc" 
+                                  "cpu/ps/pull_kernel.cc"
+                                  "cpu/ps/embedding_look_up_ps_kernel.cc"
+                                  "cpu/ps/embedding_look_up_proxy_kernel.cc"
+                                  "cpu/ps/apply_momentum_ps_kernel.cc"
+                                  "cpu/ps/sparse_apply_adam_ps_kernel.cc"
+                                  "cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
+
     if (NOT ENABLE_MPI)
         list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
         list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
@@ -55,4 +63,4 @@ endif()
 
 set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
     PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
-add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
+add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
index 99e792216f..7e7fd20f39 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/aicpu/aicpu_kernel_build.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
 #include <google/protobuf/text_format.h>
 #include <fstream>
 #include <utility>
@@ -22,18 +22,18 @@
 #include <memory>
 #include <algorithm>
 #include <map>
-#include "device/kernel_runtime.h"
-#include "kernel/aicpu/aicpu_kernel_mod.h"
-#include "kernel/akg/akg_kernel_build.h"
+#include "runtime/device/kernel_runtime.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 #include "proto/tensor.pb.h"
 #include "proto/tensor_shape.pb.h"
 #include "proto/attr.pb.h"
 #include "proto/node_def.pb.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "kernel/aicpu/aicpu_util.h"
-#include "session/kernel_graph.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
index a3c24ae49e..6e2ee3959b 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
 #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
index 3670a2d76f..76c29b9f5c 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/aicpu/aicpu_kernel_metadata.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
 #include <memory>
 #include <string>
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
-#include "kernel/aicpu/aicpu_util.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
index 74e667856e..e21f4eace4 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
index 2213f176cc..e18b3169f3 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/aicpu/aicpu_kernel_mod.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
 
 #include <memory>
 #include <vector>
@@ -23,9 +23,10 @@
 
 #include "runtime/mem.h"
 #include "runtime/rt.h"
-#include "kernel/aicpu/aicpu_kernel_build.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
 #include "utils/convert_utils.h"
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "utils/context/ms_context.h"
 
 using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
 
@@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
   if (node_name_ == kTopK) {
     node_name_ = kTopKV2;
   }
+
   AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
-    stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
+    kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
 
   MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
   return {task_info_ptr};
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
similarity index 96%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
index 3ee9bd2a15..82260010ea 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
 namespace mindspore {
 namespace kernel {
 class AicpuOpKernelMod : public AscendKernelMod {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_util.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
index a617f56f8f..790319daa6 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
 #include <vector>
 #include <string>
 #include "proto/types.pb.h"
 #include "runtime/mem.h"
 #include "runtime/rt.h"
 #include "utils/convert_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
similarity index 97%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_util.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
index f2092abbe2..fd4495afeb 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
@@ -29,7 +29,6 @@ constexpr auto kInitData = "InitData";
 constexpr auto kGetNext = "GetNext";
 constexpr auto kPrint = "Print";
 constexpr auto kPack = "Pack";
-
 constexpr auto kOutputTypes = "output_types";
 constexpr auto kOutputShapes = "output_shapes";
 constexpr auto kChannelName = "channel_name";
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/attr.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/attr.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/node_def.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/node_def.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/tensor.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/tensor.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/tensor_shape.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/tensor_shape.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/types.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/types.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
index 018fbe4f2a..73fdb5c11b 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/akg/akg_kernel_attrs_process.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
 
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
index 9d15d4f9e9..9ba724db42 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include "ir/anf.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
similarity index 99%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
index 0e8d93d47f..9c13629b1b 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 #include <Python.h>
 #include <sys/types.h>
 #include <signal.h>
@@ -35,8 +35,8 @@
 #include "utils/convert_utils.h"
 #include "utils/any.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/akg/akg_kernel_attrs_process.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
index 15fa03f45b..7b6a2f0b86 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
@@ -22,11 +22,11 @@
 #include <memory>
 #include <map>
 #include <utility>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/dtype.h"
 #include <nlohmann/json.hpp>
-#include "kernel/common_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
index 3515add1e0..f3567428d3 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/akg_kernel_metadata.h"
+#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
index 5e329f0080..02785c6cdb 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
@@ -21,7 +21,7 @@
 #include <vector>
 #include <unordered_map>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
index 7200a91ac0..d698c89bc9 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
 
 #include <algorithm>
 #include <map>
@@ -26,12 +26,12 @@
 #include <Python.h>
 #include "ir/dtype.h"
 #include "ir/func_graph.h"
-#include "kernel/kernel.h"
-#include "kernel/common_utils.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
-#include "kernel/akg/akg_kernel_attrs_process.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
index 01752911ed..713b65a451 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
@@ -22,8 +22,8 @@
 #include <vector>
 #include <map>
 #include "ir/anf.h"
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
index 69fc82aad3..8bb4940778 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
 #include <algorithm>
 #include <fstream>
 #include <map>
@@ -26,6 +26,7 @@
 #include "runtime/rt.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
@@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
   MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
 
   TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
-    stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
-    output_data_addrs, workspace_addrs);
+    kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
+    input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
index 18d342f629..3ea36f1a23 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
similarity index 85%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
index 534e355802..96fcd1869e 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
-#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
similarity index 93%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
index 3a1145140f..abb6d1f030 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
@@ -16,8 +16,8 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
 #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
-#include "kernel/kernel.h"
-#include "ir/base.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
index 64590cd9b8..d527f8ec76 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
 #include <fstream>
 #include <algorithm>
 #include "nlohmann/json.hpp"
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
index df9cb069f7..a6a17d033f 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
@@ -21,7 +21,7 @@
 #include <vector>
 #include <unordered_map>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/ascend_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
similarity index 84%
rename from mindspore/ccsrc/kernel/ascend_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
index 0aee881f7d..c6398eda9e 100644
--- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
@@ -20,7 +20,10 @@
 #include <vector>
 #include <memory>
 #include "framework/ge_runtime/task_info.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#endif
 
 using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
 namespace mindspore {
@@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
                                            const std::vector<AddressPtr> &, uint32_t) = 0;
   uint32_t block_dim() { return block_dim_; }
   uint32_t stream_id() { return stream_id_; }
+  virtual bool NeedDump() {
+#ifdef ENABLE_DATA_DUMP
+    return DataDumpParser::GetInstance().NeedDump(kernel_name_);
+#else
+    return false;
+#endif
+  }
 
  protected:
   uint32_t block_dim_{1};
diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
similarity index 83%
rename from mindspore/ccsrc/kernel/common_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
index ab4f59e549..f4495cdb9d 100644
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include <unordered_map>
 #include <map>
 #include <iostream>
@@ -22,16 +22,18 @@
 #include <fstream>
 #include <thread>
 #include "nlohmann/json.hpp"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "ir/manager.h"
 #include "ir/meta_tensor.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
 
 namespace mindspore {
 namespace kernel {
+constexpr char kAxis[] = "axis";
+constexpr char kTypeInt32[] = "Int32";
 const std::unordered_map<std::string, TypeId> type_id_maps = {
   {"float", TypeId::kNumberTypeFloat32},   {"float16", TypeId::kNumberTypeFloat16},
   {"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
@@ -579,8 +581,40 @@ void WorkerForReduceSparseGradient(WorkerParamsForReduceSparseGradient param) {
   }
 }
 
+void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
+                                        size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
+                                        std::vector<size_t> *slice_positions) {
+  MS_LOG(DEBUG) << "Start";
+  size_t thread_num = 24;
+  if (slice_positions->size() < thread_num) {
+    thread_num = slice_positions->size();
+  }
+  size_t stride = (slice_positions->size() + thread_num - 1) / thread_num;
+  thread_num = (slice_positions->size() + stride - 1) / stride;
+  std::vector<std::thread> threads;
+  size_t max_length = sorted_indices->size() * outer_dim;
+  for (size_t i = 0; i < thread_num; ++i) {
+    size_t slice_start = i * stride;
+    size_t slice_end = 0;
+    if (i == thread_num - 1) {
+      slice_end = slice_positions->size();
+    } else {
+      slice_end = slice_start + stride;
+    }
+    WorkerParamsForReduceSparseGradient params{
+      slice_start, slice_end, max_length, outer_dim, sorted_indices, slice_positions, origin_sparse_grad.value_,
+      unique_grad};
+    threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
+  }
+  for (size_t i = 0; i < thread_num; ++i) {
+    threads[i].join();
+  }
+  MS_LOG(DEBUG) << "End";
+}
+
 void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
-                          size_t outer_dim) {
+                          size_t outer_dim, bool use_multi_threads) {
+  MS_LOG(DEBUG) << "Start";
   MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
   MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
   MS_EXCEPTION_IF_NULL(unique_grad);
@@ -599,37 +633,102 @@ void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradie
     [](const std::pair<int, size_t> &left, const std::pair<int, size_t> &right) { return left.first < right.first; });
   int last_index = 0;
   std::vector<size_t> slice_positions;
+  slice_positions.reserve(sorted_indices.size());
   for (size_t i = 0; i < sorted_indices.size(); ++i) {
     if (i == 0 || last_index != sorted_indices[i].first) {
       slice_positions.emplace_back(i);
     }
     last_index = sorted_indices[i].first;
   }
-  size_t thread_num = 8;
-  if (slice_positions.size() < thread_num) {
-    thread_num = slice_positions.size();
+  if (use_multi_threads) {
+    RunMultiThreadReduceSparseGradient(origin_sparse_grad, unique_grad, outer_dim, &sorted_indices, &slice_positions);
+  } else {
+    size_t max_length = sorted_indices.size() * outer_dim;
+    WorkerParamsForReduceSparseGradient params{0,
+                                               slice_positions.size(),
+                                               max_length,
+                                               outer_dim,
+                                               &sorted_indices,
+                                               &slice_positions,
+                                               origin_sparse_grad.value_,
+                                               unique_grad};
+    WorkerForReduceSparseGradient(params);
+  }
+  unique_grad->indices_size_ = slice_positions.size();
+  MS_LOG(DEBUG) << "End";
+}
+
+void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
+                               SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
+                               size_t outer_dim) {
+  MS_LOG(DEBUG) << "Start";
+  if (unique_slice_grads.empty()) {
+    return;
+  }
+  size_t index_data_size = outer_dim * sizeof(float);
+  size_t unique_indices_size = 0;
+  for (size_t i = 0; i < unique_slice_grads.size(); ++i) {
+    auto &slice_grad = unique_slice_grads[i];
+    auto ret_code = memcpy_s(tmp_grad->value_ + unique_indices_size * outer_dim,
+                             (tmp_grad->indices_size_ - unique_indices_size) * index_data_size, slice_grad->value_,
+                             slice_grad->indices_size_ * index_data_size);
+    if (ret_code != EOK) {
+      MS_LOG(EXCEPTION) << "Failed to copy data!";
+    }
+    ret_code =
+      memcpy_s(tmp_grad->indices_ + unique_indices_size, (tmp_grad->indices_size_ - unique_indices_size) * sizeof(int),
+               slice_grad->indices_, slice_grad->indices_size_ * sizeof(int));
+    if (ret_code != EOK) {
+      MS_LOG(EXCEPTION) << "Failed to copy data!";
+    }
+    unique_indices_size += slice_grad->indices_size_;
+  }
+  tmp_grad->indices_size_ = unique_indices_size;
+  ReduceSparseGradient(*tmp_grad, unique_grad, first_dim, outer_dim);
+  MS_LOG(DEBUG) << "End";
+}
+
+void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
+                                  SparseGradient *unique_grad, size_t first_dim, size_t outer_dim) {
+  MS_LOG(DEBUG) << "Start";
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
+  MS_EXCEPTION_IF_NULL(unique_grad);
+  MS_EXCEPTION_IF_NULL(unique_grad->value_);
+  MS_EXCEPTION_IF_NULL(unique_grad->indices_);
+  MS_EXCEPTION_IF_NULL(tmp_grad);
+  MS_EXCEPTION_IF_NULL(tmp_grad->value_);
+  MS_EXCEPTION_IF_NULL(tmp_grad->indices_);
+  size_t thread_num = 24;
+  if (origin_sparse_grad.indices_size_ < thread_num) {
+    thread_num = origin_sparse_grad.indices_size_;
   }
-  size_t stride = (slice_positions.size() + thread_num - 1) / thread_num;
-  thread_num = (slice_positions.size() + stride - 1) / stride;
+  size_t thread_indices_size = origin_sparse_grad.indices_size_ / thread_num;
+  size_t left_indices_size = origin_sparse_grad.indices_size_ % thread_num;
   std::vector<std::thread> threads;
-  size_t max_length = sorted_indices.size() * outer_dim;
+  threads.reserve(thread_num);
+  std::vector<std::shared_ptr<SparseGradient>> unique_slice_grads;
   for (size_t i = 0; i < thread_num; ++i) {
-    size_t slice_start = i * stride;
-    size_t slice_end = 0;
+    size_t indices_size = thread_indices_size;
     if (i == thread_num - 1) {
-      slice_end = slice_positions.size();
-    } else {
-      slice_end = slice_start + stride;
+      indices_size = thread_indices_size + left_indices_size;
     }
-    WorkerParamsForReduceSparseGradient params{
-      slice_start, slice_end, max_length, outer_dim, &sorted_indices, &slice_positions, origin_sparse_grad.value_,
-      unique_grad};
-    threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
+    size_t value_offset = i * thread_indices_size * outer_dim;
+    size_t indices_offset = i * thread_indices_size;
+    auto slice_grad = SparseGradient(
+      {origin_sparse_grad.value_ + value_offset, origin_sparse_grad.indices_ + indices_offset, indices_size});
+    unique_slice_grads.emplace_back(std::make_shared<SparseGradient>());
+    unique_slice_grads[i]->value_ = unique_grad->value_ + value_offset;
+    unique_slice_grads[i]->indices_ = unique_grad->indices_ + indices_offset;
+    unique_slice_grads[i]->indices_size_ = indices_size;
+    threads.emplace_back(
+      std::thread(ReduceSparseGradient, slice_grad, unique_slice_grads[i].get(), first_dim, outer_dim, false));
   }
   for (size_t i = 0; i < thread_num; ++i) {
     threads[i].join();
   }
-  unique_grad->indices_size_ = slice_positions.size();
+  ReduceMultiSparseGradient(unique_slice_grads, tmp_grad, unique_grad, first_dim, outer_dim);
+  MS_LOG(DEBUG) << "End";
 }
 
 std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index) {
@@ -892,5 +991,39 @@ void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputePa
     threads[i].join();
   }
 }
+
+std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
+  if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
+      AnfAlgo::GetInputTensorNum(cnode) != 1) {
+    MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
+                      << "] is not single input or single output ";
+  }
+  std::vector<int> axis;
+  auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto axis_attr = primitive->GetAttr(kAxis);
+  if (axis_attr == nullptr) {
+    MS_LOG(ERROR) << "This node does't have axie attr.";
+    return std::vector<int>();
+  }
+  auto type = axis_attr->type();
+  MS_EXCEPTION_IF_NULL(type);
+  std::vector<int> axis_list;
+  if (type->ToString() == kTypeInt32) {
+    axis_list.emplace_back(GetValue<int>(axis_attr));
+  } else {
+    axis_list = GetValue<std::vector<int>>(axis_attr);
+  }
+  for (const auto &elem : axis_list) {
+    if (elem < 0) {
+      axis.emplace_back(input_shape.size() + elem);
+    } else {
+      axis.emplace_back(elem);
+    }
+  }
+  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(axis), cnode);
+  return axis;
+}
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
similarity index 83%
rename from mindspore/ccsrc/kernel/common_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/common_utils.h
index e9d72848f6..8c9ea84b34 100644
--- a/mindspore/ccsrc/kernel/common_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
@@ -26,9 +26,9 @@
 #include <vector>
 #include <utility>
 #include <nlohmann/json.hpp>
-#include "kernel/kernel.h"
-#include "kernel/oplib/opinfo.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
@@ -115,7 +115,7 @@ int Sign(float x);
 void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
                               size_t outer_dim);
 void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
-                          size_t outer_dim);
+                          size_t outer_dim, bool use_multi_threads = true);
 std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
 std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
                                                                             const std::vector<AnfNodePtr> &input_list);
@@ -130,6 +130,15 @@ void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<An
 bool IsWeightBoundary(const AnfNodePtr &node);
 void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
                         size_t total_compute_size);
+void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
+                                        size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
+                                        std::vector<size_t> *slice_positions);
+void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
+                               SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
+                               size_t outer_dim);
+void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
+                                  SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
+std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
index 5b3194608e..1300847d40 100644
--- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/addn_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
index 1a1a9157d9..925f0fab50 100644
--- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
index 9cc5126c08..55afecb8fa 100644
--- a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
@@ -13,10 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/allgather_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
index 1dddf810ef..42c83ccf0b 100644
--- a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
similarity index 90%
rename from mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
index 3cd6c57413..c1ff8d54bd 100644
--- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/apply_momentum_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
index c0ca581974..23e8488890 100644
--- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
index ee328df721..d67c4d47ff 100644
--- a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/argmax_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
index aae7435c5c..3883344f96 100644
--- a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
index 00f3017231..f42bb6807d 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/bias_add_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
index 516a21147b..c572f68230 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
index 1d9c7d076e..8b6e2d0188 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/bias_add_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
index e3ac896096..a5743879a7 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
index d8f2ef421b..6776c0f154 100644
--- a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/concat_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
index 46f9078178..94e4ad40f3 100644
--- a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
index 2be05038d6..fb9398e7c4 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
index 0836529840..f2aa292c6e 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@@ -21,9 +21,9 @@
 #include <memory>
 #include <numeric>
 #include <functional>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/anf.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
@@ -55,7 +55,7 @@ class CPUKernel : public kernel::KernelMod {
  public:
   CPUKernel() = default;
   ~CPUKernel() override = default;
-  void Init(const CNodePtr &kernel_node);
+  virtual void Init(const CNodePtr &kernel_node);
   virtual void InitKernel(const CNodePtr &kernel_node) = 0;
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
index bcda7af9fd..accd742976 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 #include <memory>
 #include <iostream>
 #include <string>
 
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
@@ -38,7 +38,7 @@ void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr
 }
 
 std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
-  auto kernel_info = apply_kernel->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_Info);
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
similarity index 85%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
index 52eda12ba7..80f9a342ac 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
@@ -24,8 +24,8 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "kernel/cpu/cpu_kernel.h"
-#include "device/cpu/kernel_select_cpu.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 
 namespace mindspore {
 namespace kernel {
@@ -62,10 +62,12 @@ class CPUKernelRegistrar {
   static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR,                \
                                                              []() { return std::make_shared<OPCLASS>(); });
 
-#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T)                                         \
+#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T)
+#define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)
+#define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)                                \
   static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \
-  static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_reg(#OPNAME, ATTR,            \
-                                                                    []() { return std::make_shared<OPCLASS<T>>(); });
+  static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg(                \
+    #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); });
 
 #define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S)                                       \
   static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \
diff --git a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
index a1dcaca3f3..344f03cc53 100644
--- a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/debug_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/debug_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
diff --git a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
index da9f3286b9..18302e8992 100644
--- a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
index 07da3dcc25..1bcc36faa4 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
@@ -14,10 +14,9 @@
  * limitations under the License.
  */
 #include <thread>
-#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
index 7222bd9be1..3e3807f58e 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
index c8c2c667ad..b2feb9204f 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
@@ -15,9 +15,9 @@
  */
 #include <thread>
 #include <string>
-#include "kernel/cpu/embedding_look_up_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
@@ -36,7 +36,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   }
   output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
   axis_ = 4 - input_shape_.size();
-  reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "reduce_scatter_flag");
+  if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) {
+    reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag);
+  }
 #ifdef ENABLE_MPI
   if (reduce_scatter_flag_) {
     size_t gatherv2_out_lens = 1;
@@ -65,7 +67,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
     MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true";
   }
 #endif
-  offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "offset");
+  if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
+    offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
+  }
   CPUKernelUtils::ExpandDimsTo4(&input_shape_);
   CPUKernelUtils::ExpandDimsTo4(&output_shape_);
 }
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
index d839571caa..6c61ee346c 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
index 60e7eafa78..a61cd185c6 100644
--- a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/equal_count_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
index 13083889d0..6e4ed6d5f1 100644
--- a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
index 28090817cb..73b11f1c01 100644
--- a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/gather_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
index 2ffd7df4d4..8fdac0dfde 100644
--- a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
index 657c85dc48..e58b1d319c 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
index 1cb100299e..c0c64ba4da 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
index fbfebaf56e..3fa6a91405 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
index 49559f452b..ae8269c142 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
index ff0b8633d4..1f02d70f86 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
 #include <string>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
index 9fb024a279..6f699130a8 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
index 0a343785f7..626fd1934e 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
index d42ff803f0..761494a931 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
@@ -24,7 +24,7 @@
 #endif
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 namespace mindspore {
 namespace kernel {
 class LstmCPUKernel : public MKLCPUKernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
index d7e7701d85..56da8ec808 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h"
 #include <cstring>
 #include <cmath>
 #include <numeric>
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
index 1f3fb824c0..b95b5ba792 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
index 28266f2aa0..4bbaa6459f 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h"
 #include <algorithm>
 #include <utility>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "common/utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
index 10276d01fa..ef52f652d0 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
index a38470e3a3..c71abe809d 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 #include <vector>
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
index 10a860afff..fc7128b10e 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
@@ -21,8 +21,8 @@
 #include <memory>
 #include <vector>
 #include "dnnl.hpp"
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
index 5ae9791b12..777668f960 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "utils/log_adapter.h"
 #include "dnnl.hpp"
 
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h
similarity index 100%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
index 4f77508004..fddd769047 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mul_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
index 1131fd594c..182679f59d 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
index 5225050dc1..e4bedf23b9 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h"
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
index 4993d0834d..8187eaffda 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
index c0459de790..8189df07ff 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h"
 #include <string>
 #include <utility>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
index cdb2c69ef0..95a7bb3f66 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <utility>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
index d5ef20a25e..29ac9a1062 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/relu_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
index 26905e267d..a2da2480e2 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
index 4a6213ddf2..9139aa7862 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
index f0a77ee282..c895ab2756 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
index 7fa740cfc0..94271b8a69 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
index 6acb9e5b9b..2812dd31af 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
index 05b1a79924..889e2abdec 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h"
 #include <numeric>
 #include <functional>
 #include <cmath>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
index f663508059..d05cb49b7b 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
index c33fcd246f..b8bf7b318a 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
 #include <numeric>
 #include <functional>
 #include <cmath>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
index 6391b27de6..0d79b0514b 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
index 00dfe73f28..5bbc9f49a2 100644
--- a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/one_hot_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/one_hot_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
index ef13047343..393b0e8c41 100644
--- a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc
new file mode 100644
index 0000000000..6537c88840
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+bool ApplyMomentumPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                    const std::vector<AddressPtr> &outputs) {
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &ApplyMomentumPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &ApplyMomentumPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &ApplyMomentumPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h
new file mode 100644
index 0000000000..a78f40d04b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class ApplyMomentumPSKernel : public ApplyMomentumCPUKernel, public PServerKernel {
+ public:
+  ApplyMomentumPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~ApplyMomentumPSKernel() override = default;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
new file mode 100644
index 0000000000..59ab65014b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h"
+#include <vector>
+#include "frontend/parallel/ps/worker.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
+  EmbeddingLookUpCPUKernel::InitKernel(kernel_node);
+
+  for (auto dim : input_shape_) {
+    input_dims_ *= dim;
+  }
+
+  if (mindspore::parallel::ps::Util::IsRoleOfWorker()) {
+    key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+  }
+  std::vector<size_t> keys{key_, key_, key_};
+  std::vector<size_t> values;
+  values.insert(values.end(), input_shape_.begin(), input_shape_.end());
+  values.insert(values.end(), indices_shape_.begin(), indices_shape_.end());
+  values.insert(values.end(), output_shape_.begin(), output_shape_.end());
+  std::vector<int> lens{SizeToInt(input_shape_.size()), SizeToInt(indices_shape_.size()),
+                        SizeToInt(output_shape_.size())};
+  const char *env_role = getenv(mindspore::parallel::ps::kEnvRole);
+  if (env_role != nullptr && strcmp(env_role, mindspore::parallel::ps::kEnvRoleOfWorker) == 0) {
+    parallel::ps::Worker<float>::GetInstance().AddEmbeddingTable(key_, input_shape_[axis_]);
+    parallel::ps::Worker<float>::GetInstance().InitPSEmbeddingTable(keys, values, lens);
+  }
+}
+
+bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                                        const std::vector<kernel::AddressPtr> & /*workspace*/,
+                                        const std::vector<kernel::AddressPtr> &outputs) {
+  auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
+  auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
+  size_t input_size = inputs[1]->size;
+  size_t output_size = outputs[0]->size;
+
+  size_t size = input_size / sizeof(float);
+  ::ps::SArray<float> lookup_ids(size, 0);
+  ::ps::SArray<int> lengths{size};
+  ::ps::SArray<float> lookup_result;
+
+  auto ret = memcpy_s(lookup_ids.data(), input_size, indices_addr, input_size);
+  if (ret != EOK) {
+    MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
+  }
+  parallel::ps::Worker<float>::GetInstance().DoPSEmbeddingLookup({key_}, lookup_ids, lengths, lookup_result,
+                                                                 parallel::ps::kEmbeddingLookupCmd);
+
+  auto ret2 = memcpy_s(output_addr, output_size, lookup_result.data(), output_size);
+  if (ret2 != EOK) {
+    MS_LOG(EXCEPTION) << "Lookup result memcpy failed.";
+  }
+  return true;
+}
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h
new file mode 100644
index 0000000000..45e0a23fcb
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
+
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include <vector>
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class EmbeddingLookUpProxyKernel : public EmbeddingLookUpCPUKernel {
+ public:
+  EmbeddingLookUpProxyKernel() = default;
+  ~EmbeddingLookUpProxyKernel() override = default;
+
+  void InitKernel(const CNodePtr &kernel_node) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+ private:
+  size_t key_{0};
+  size_t input_dims_{1};
+};
+
+MS_REG_CPU_KERNEL(
+  EmbeddingLookupProxy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
+  EmbeddingLookUpProxyKernel);
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
new file mode 100644
index 0000000000..bcb3ca8ae8
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
+#include <functional>
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::parallel::ps::Util;
+void EmbeddingLookUpPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  input_shape_ = *(shape_vec[0]);
+  input_lens_ = 1;
+  for (auto shape : input_shape_) {
+    input_lens_ = input_lens_ * shape;
+  }
+  indices_shape_ = *(shape_vec[1]);
+  indices_lens_ = 1;
+  for (auto shape : indices_shape_) {
+    indices_lens_ = indices_lens_ * shape;
+  }
+  output_shape_ = *(shape_vec[2]);
+  axis_ = 2;
+  reduce_scatter_flag_ = false;
+
+  size_t offset = 0;
+  for (size_t i = 0; i < rank_id_; i++) {
+    offset += Util::LocalShard(input_shape_[axis_], i, pserver_num_);
+  }
+  offset_ = offset;
+  split_num_ = pserver_num_;
+
+  // input shape should be sharded after computing offset_;
+  Shard(input_shape_, axis_);
+
+  size_t output_size =
+    std::accumulate(output_shape_.begin(), output_shape_.end(), sizeof(float), std::multiplies<size_t>());
+  output_size_list_.emplace_back(output_size);
+  CPUKernelUtils::ExpandDimsTo4(&input_shape_);
+  CPUKernelUtils::ExpandDimsTo4(&output_shape_);
+}
+
+void EmbeddingLookUpPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  const auto &indices_shape_ = *(shape_vec[0]);
+  indices_lens_ = indices_shape_[0];
+
+  size_t output_size = sizeof(float) * indices_lens_;
+  for (size_t i = axis_ + 1; i < input_shape_.size(); i++) {
+    output_size *= input_shape_[i];
+  }
+  output_size_list_.clear();
+  output_size_list_.emplace_back(output_size);
+}
+
+bool EmbeddingLookUpPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::input_sizes() const { return input_shape_; }
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h
new file mode 100644
index 0000000000..e23a90a11c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class EmbeddingLookUpPSKernel : public EmbeddingLookUpCPUKernel, public PServerKernel {
+ public:
+  EmbeddingLookUpPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~EmbeddingLookUpPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
new file mode 100644
index 0000000000..3aa421881a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
new file mode 100644
index 0000000000..a2b6c4fa61
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::parallel::ps::Util;
+class PServerKernel {
+ public:
+  PServerKernel(size_t rank_id, size_t pserver_num) : rank_id_(rank_id), pserver_num_(pserver_num) {}
+  ~PServerKernel() = default;
+  PServerKernel(const PServerKernel &) = delete;
+  PServerKernel &operator=(const PServerKernel &) = delete;
+
+  virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
+  virtual void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
+  virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                       const std::vector<AddressPtr> &outputs) = 0;
+
+  virtual const std::vector<size_t> &input_sizes() const = 0;
+  virtual const std::vector<size_t> &output_sizes() const = 0;
+  virtual const std::vector<size_t> &workspace_sizes() const = 0;
+
+ protected:
+  virtual void ReInit(const std::vector<AddressPtr> &) {}
+  void Shard(std::vector<size_t> *shape, int axis) {
+    (*shape)[axis] = Util::LocalShard((*shape)[axis], rank_id_, pserver_num_);
+  }
+
+  size_t rank_id_;
+  size_t pserver_num_;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc
new file mode 100644
index 0000000000..92c901d4c8
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/pull_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_CPU_KERNEL_T(
+  Pull, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  PullKernel, float);
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
new file mode 100644
index 0000000000..84dd9b819e
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "frontend/parallel/ps/worker.h"
+#include "frontend/parallel/ps/util.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class PullKernel : public CPUKernel {
+ public:
+  PullKernel() : keys_size_(sizeof(size_t)), var_size_(sizeof(size_t)) {}
+  ~PullKernel() override = default;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &) {
+    // If the paramter is embedding table, don't Pull from PServer.
+    if (param_name_.find("embedding") == std::string::npos && param_name_.find("wide_w") == std::string::npos) {
+      parallel::ps::Worker<T>::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
+    }
+    return true;
+  }
+  void Init(const CNodePtr &kernel_node) {
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    if (input_num != 2) {
+      MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs.";
+      return;
+    }
+
+    auto key_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < key_shape.size(); i++) {
+      keys_size_ *= key_shape[i];
+    }
+    auto var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    for (size_t i = 0; i < var_shape.size(); i++) {
+      var_size_ *= var_shape[i];
+    }
+    auto param_node = AnfAlgo::GetInputNode(kernel_node, 1);
+    MS_EXCEPTION_IF_NULL(param_node);
+    param_name_ = param_node->fullname_with_scope();
+
+    if (mindspore::parallel::ps::Util::IsRoleOfWorker()) {
+      key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+    }
+    InitSizeLists();
+    return;
+  }
+  void InitKernel(const CNodePtr &kernel_node) { return; }
+
+ protected:
+  void InitSizeLists() {
+    input_size_list_.push_back(keys_size_);
+    input_size_list_.push_back(var_size_);
+    output_size_list_.push_back(0);
+  }
+
+ private:
+  size_t key_;
+  size_t keys_size_;
+  size_t var_size_;
+  std::string param_name_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc
new file mode 100644
index 0000000000..96c1f15bda
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/push_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_CPU_KERNEL_T(Push,
+                    KernelAttr()
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeInt32)
+                      .AddOutputAttr(kNumberTypeUInt64),
+                    PushKernel, float);
+
+MS_REG_CPU_KERNEL_T(
+  Push, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64),
+  PushKernel, float);
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
new file mode 100644
index 0000000000..938792f3bf
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
+
+#include <vector>
+#include <algorithm>
+#include "frontend/parallel/ps/worker.h"
+#include "frontend/parallel/ps/util.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class PushKernel : public CPUKernel {
+ public:
+  PushKernel() : key_(UINT64_MAX) {}
+  ~PushKernel() override = default;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs) {
+    std::vector<size_t> keys;
+    std::vector<uintptr_t> addrs;
+    std::vector<int> sizes;
+    for (auto input : inputs) {
+      keys.push_back(key_);
+      addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
+      sizes.push_back(SizeToInt(input->size) / sizeof(T));
+    }
+    parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes);
+    memcpy(outputs[0]->addr, &key_, sizeof(size_t));
+    return true;
+  }
+
+  void Init(const CNodePtr &kernel_node) {
+    key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+    auto optim_input_shapes = AnfAlgo::GetNodeAttr<std::vector<std::vector<int>>>(kernel_node, "optim_input_shapes");
+    std::vector<int> only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "only_shape_indices");
+    MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
+    MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
+    for (size_t i = 0; i < optim_input_shapes.size(); i++) {
+      auto shape = optim_input_shapes[i];
+      mindspore::parallel::ps::Worker<float>::GetInstance().SetOptimInputShapes(key_, shape);
+      if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
+        size_t size = sizeof(T);
+        for (size_t j = 0; j < shape.size(); j++) {
+          size *= shape[j];
+        }
+        input_size_list_.push_back(size);
+      }
+    }
+
+    output_size_list_.push_back(sizeof(size_t));
+    return;
+  }
+
+  void InitKernel(const CNodePtr &kernel_node) { return; }
+
+ private:
+  size_t key_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
new file mode 100644
index 0000000000..c7283954f8
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
+#include <memory>
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void SparseApplyAdamPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> &var_shape = *(shape_vec[0]);
+  std::vector<size_t> &m_shape = *(shape_vec[1]);
+  std::vector<size_t> &v_shape = *(shape_vec[2]);
+  const std::vector<size_t> &grad_shape = *(shape_vec[9]);
+  const std::vector<size_t> &indices_shape = *(shape_vec[10]);
+
+  Shard(&var_shape, 0);
+  Shard(&m_shape, 0);
+  Shard(&v_shape, 0);
+
+  if (!IsSameShape(var_shape, m_shape)) {
+    MS_LOG(EXCEPTION) << "var and m should have the same shape";
+  }
+  if (!IsSameShape(var_shape, v_shape)) {
+    MS_LOG(EXCEPTION) << "var and v should have the same shape";
+  }
+  var_first_dim_size_ = var_shape[0];
+  for (size_t i = 1; i < var_shape.size(); ++i) {
+    if (var_shape[i] != grad_shape[i]) {
+      MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
+    }
+    var_outer_dim_size_ *= var_shape[i];
+  }
+  if (indices_shape.size() != 1) {
+    MS_LOG(EXCEPTION) << "indices must be 1D";
+  }
+  indices_size_ = indices_shape[0];
+  if (grad_shape[0] != indices_size_) {
+    MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
+  }
+  /*
+  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
+    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
+  }
+  */
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float));
+}
+
+void SparseApplyAdamPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  const std::vector<size_t> &indices_shape = *(shape_vec[0]);
+  indices_size_ = indices_shape[0];
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
+  const auto &indices_addr = inputs[10];
+  indices_size_ = indices_addr->size;
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+bool SparseApplyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  ReInit(inputs);
+  int *indices = reinterpret_cast<int *>(inputs[10]->addr);
+  for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
+    indices[i] -= rank_id_ * var_first_dim_size_;
+  }
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
new file mode 100644
index 0000000000..337fcb3bf0
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::kernel::SparseApplyAdamCPUKernel;
+class SparseApplyAdamPSKernel : public SparseApplyAdamCPUKernel, public PServerKernel {
+ public:
+  SparseApplyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~SparseApplyAdamPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+
+ protected:
+  void ReInit(const std::vector<AddressPtr> &) override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
new file mode 100644
index 0000000000..0392bd5a69
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void SparseApplyFtrlPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> var_shape = *(shape_vec[0]);
+  std::vector<size_t> accum_shape = *(shape_vec[1]);
+  std::vector<size_t> linear_shape = *(shape_vec[2]);
+  std::vector<size_t> grad_shape = *(shape_vec[3]);
+  std::vector<size_t> indices_shape = *(shape_vec[4]);
+
+  Shard(&var_shape, 0);
+  Shard(&accum_shape, 0);
+  Shard(&linear_shape, 0);
+
+  var_first_dim_size_ = var_shape[0];
+  for (size_t i = 1; i < var_shape.size(); ++i) {
+    if (var_shape[i] != grad_shape[i]) {
+      MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
+    }
+    var_outer_dim_size_ *= var_shape[i];
+  }
+  if (indices_shape.size() != 1) {
+    MS_LOG(EXCEPTION) << "indices must be a 1D vector";
+  }
+  indices_size_ = indices_shape[0];
+  if (grad_shape[0] != indices_size_) {
+    MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
+  }
+  lr_ = 0.01;
+  l1_ = 1e-8;
+  l2_ = 1e-8;
+  lr_power_ = -0.5;
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+}
+
+void SparseApplyFtrlPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> indices_shape = *(shape_vec[0]);
+  indices_size_ = indices_shape[0];
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
+  const auto &indices_addr = inputs[4];
+  indices_size_ = indices_addr->size;
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+bool SparseApplyFtrlPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  ReInit(inputs);
+  int *indices = reinterpret_cast<int *>(inputs[4]->addr);
+  for (size_t i = 0; i < inputs[4]->size / sizeof(int); i++) {
+    indices[i] -= rank_id_ * var_first_dim_size_;
+  }
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
new file mode 100644
index 0000000000..d97f19d349
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::kernel::SparseApplyFtrlCPUKernel;
+class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel {
+ public:
+  SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~SparseApplyFtrlPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+
+ protected:
+  void ReInit(const std::vector<AddressPtr> &) override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
index e56f2af8c7..0dddf1d3c4 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
@@ -16,8 +16,8 @@
 #include <map>
 #include <string>
 #include <vector>
-#include "kernel/cpu/reduce_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
index 3317ec72ed..a9696bad49 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
index 19a4e907a0..f44c109ace 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/reduce_scatter_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
index 5c6907602a..317d7df443 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
 #include <vector>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
index 7342a19e99..6370fdc78a 100644
--- a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/reshape_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/reshape_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
index 6ca746f4ac..04f1db3304 100644
--- a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
index d2530430e9..c6657a845a 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/slice_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/slice_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
index 913c993d7a..03b7ecdc17 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SLICE_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
index 92eaffe8c6..20904e0504 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/slice_grad_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
index 1e42c8ac68..ec480d7e80 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SLICE_GRAD_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
index ef3db78275..2ff8e77fcd 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
index c2770d0ebd..5d3d4193f7 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
@@ -33,7 +33,7 @@ class SparseApplyAdamCPUKernel : public CPUKernel {
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
 
- private:
+ protected:
   size_t indices_size_{0};
   size_t var_first_dim_size_{0};
   size_t var_outer_dim_size_{1};
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
index 0537e746f3..2662604e19 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_ftrl_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
@@ -66,6 +66,8 @@ void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node)
   MS_EXCEPTION_IF_NULL(kernel_node);
   workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
   workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
 }
 
 void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) {
@@ -130,9 +132,12 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
   auto indices = reinterpret_cast<int *>(inputs[4]->addr);
   auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
   auto new_indices = reinterpret_cast<int *>(workspace[1]->addr);
+  auto tmp_grad = reinterpret_cast<float *>(workspace[2]->addr);
+  auto tmp_indices = reinterpret_cast<int *>(workspace[3]->addr);
   SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_});
-  ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
-                       var_outer_dim_size_);
+  SparseGradient tmp_sparse_grad({tmp_grad, tmp_indices, indices_size_});
+  TwoLevelReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &tmp_sparse_grad, &unique_sparse_grad,
+                               var_first_dim_size_, var_outer_dim_size_);
 
   MultiThreadComputeParams input_params;
   input_params.var_ = var;
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
index 9e79dc83c7..af8796d8a5 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
@@ -32,7 +32,7 @@ class SparseApplyFtrlCPUKernel : public CPUKernel {
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
 
- private:
+ protected:
   size_t indices_size_{0};
   size_t var_first_dim_size_{0};
   size_t var_outer_dim_size_{1};
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
index 16cb901b04..636d92dcbb 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
@@ -61,6 +61,8 @@ void SparseApplyLazyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_no
   MS_EXCEPTION_IF_NULL(kernel_node);
   workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
   workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
 }
 
 void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
@@ -121,10 +123,13 @@ bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr>
   auto indices = reinterpret_cast<int *>(inputs[10]->addr);
   auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
   auto new_indices = reinterpret_cast<int *>(workspace[1]->addr);
+  auto tmp_grad = reinterpret_cast<float *>(workspace[2]->addr);
+  auto tmp_indices = reinterpret_cast<int *>(workspace[3]->addr);
 
   SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_});
-  ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
-                       var_outer_dim_size_);
+  SparseGradient tmp_sparse_grad({tmp_grad, tmp_indices, indices_size_});
+  TwoLevelReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &tmp_sparse_grad, &unique_sparse_grad,
+                               var_first_dim_size_, var_outer_dim_size_);
 
   lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power);
   MultiThreadComputeParams input_params;
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
index 795568a64d..ee95db8f33 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
index 6069fb708e..efba35ad8c 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
index ff7da7966c..56b180ec0b 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
index 543f0e5cdd..1e759390a2 100644
--- a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #include <thread>
-#include "kernel/cpu/sub_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sub_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
index 54b2c8951a..d1b55ded90 100644
--- a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
index f2ac9350cb..8ec3698cf6 100644
--- a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/transpose_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/transpose_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 namespace mindspore {
 namespace kernel {
 const size_t kMaxDim = 100;
diff --git a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
similarity index 90%
rename from mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
index d882f4fa51..15796f9f3c 100644
--- a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 namespace mindspore {
 namespace kernel {
 class TransposeCPUFwdKernel : public CPUKernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
index 71f612d07c..39f535a2af 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/argmax_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
index 3df70d0960..61a53c5b40 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXGPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/argmax_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh"
 namespace mindspore {
 namespace kernel {
 #define ARGMAX_MAX_DIMENSION 2
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
index 24c8a9a730..5ead387ccc 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
index 304f0ab161..d2369023fb 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXWITHVALUEGPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
index f378604624..5d34a1c9c2 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/array_reduce_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
index 4a52439305..b96f63670d 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <map>
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 const std::map<std::string, cudnnReduceTensorOp_t> kReduceTypeMap = {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
index 3bca6a69d3..f5979dc62d 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/concatv2_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
index a91c50ce69..15ccedcaec 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONCATV2_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/concatv2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
index dc595e4793..8d3c06e805 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/gather_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
index 72a05b0915..2211361cee 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_GATHER_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/gather.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gather.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
index 7c160f8f58..e764a08dc8 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/one_hot_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
index c8b64e7243..6c46a63e69 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ONEHOT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/one_hot_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
index 41c9c2243f..3c1323de07 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/select_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
index f1b6c5853a..73e60c44bd 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SELECT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/select_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
index 53161c29c2..4c9ff2b7f4 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/slice_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
index 7f71e548ad..f8ecb9ccf0 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SLICE_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
index b91aafb734..2eeb3acf73 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/slice_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
index bf24272d93..006cbf0266 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SLICE_GRAD_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
index 338e7a4093..77e7de6fef 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/transpose_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h"
 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
index 61be9b68fe..0f9c710e3e 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_TRANSPOSE_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
index 9962d55988..4be887ec79 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
index a20375ee29..1f7884c650 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_UNSORT_SEGMENT_SUM_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/unsorted_segment_sum.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
index 5468aa6500..a89d4e9baf 100644
--- a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/control/recv_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/control/recv_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
index 12b4eed132..7de32ade4f 100644
--- a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONTROL_RECV_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
index c417c30bb3..946038bb18 100644
--- a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/control/send_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/control/send_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
index a26e41aa1e..beea19a435 100644
--- a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONTROL_SEND_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
index 3ec63ee03a..615b94723d 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/adam_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
 
 template <typename T>
 __device__ __forceinline__ T SqrtFunc(T input) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
index f48a113c26..7fc4a3e949 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate,
                const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
index dfadaa09d6..3bad9a61e1 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "adam_weight_decay_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void AdamWeightDecayKernel(const int element_num_, const bool need_decay, const float *beta1,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
index e8fab27dda..a4f1f6680b 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "argmax_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T>
 __global__ void Argmax1D(const T* input, const int channel_size, int* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
index 3313fc6853..46a8a75af9 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "argmaxwithvalue_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T, typename S>
 __global__ void ArgmaxWithValue(const T* input, const int bound, int outerSize, int innerSize, S* index,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
index d44ad99202..604391ccf3 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "assign_add_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T>
 __global__ void AssignAdd(const size_t size, T* ref, const T* value, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
index c3ce08dfd0..3a895405b1 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BATCHNORMFOLD2_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BATCHNORMFOLD2_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void BatchNormFold2Forward(const T *x, const T *beta, const T *gamma, const T *batch_std, const T *batch_mean,
                            const T *running_std, const T *running_mean, const int *global_step, T *y, int freeze_bn,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
index ddc2803f56..dae9a7d629 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
@@ -18,7 +18,7 @@
 #include <thrust/fill.h>
 #include <thrust/system/cuda/execution_policy.h>
 #include "batchnorm_fold_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void UpdateRunningStd(int channel_size, const double epsilon, T* running_std) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
index 5aa087e7f5..262d4c438d 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/broadcast_grad_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 struct MinimumGradFunc {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
index d154eddd4c..7742043592 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_GRAD_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_GRAD_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 enum BroadcastGradOpType {
   BROADCAST_GRAD_TYPE_MAXIMUM = 0,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
index afa94fc56c..a72daa4234 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/broadcast_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T, typename S>
 struct GreaterFunc {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
index 5f6992511d..dfc4c75c93 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 enum BroadcastOpType {
   BROADCAST_TYPE_GREATER = 0,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
index 5cccf183ea..147782591a 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/concatv2_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh"
 template <typename T>
 __global__ void Concat(const size_t size, const int w1, const int w2, const T* input_1, const T* input_2, T* output) {
   for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
index b6932aa4a1..7bd32c140f 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CONCATV2IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CONCATV2IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ConcatKernel(const size_t size, const int w1, const int w2, const T* input_1, const T* input_2, T* output,
                   cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
index ac2f99ed9a..87aaf1351c 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
@@ -16,7 +16,7 @@
 
 #include <thrust/reduce.h>
 #include "correction_mul_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void CorrectionMul(const T* weight, const T* gamma, const T* running_std, const int batchsize, const int chw,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
index 54ae072892..cb4ccc2c44 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CROSSENTROPY_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CROSSENTROPY_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T, typename S>
 void CrossEntropyWithSparse(const T *logits, const S *labels, const size_t batch_size, const size_t class_num, T *loss,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
index f89d42ce49..3ba27eeeea 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void DropoutForward(const T *input, T *mask, T *output, float *mask_f, size_t num_count, float keep_prob,
                     cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
index 38dd79c441..e6f424c661 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "equalcount_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 __global__ void EqualCount(const int size, const T* input1, const T* input2, T* output) {
   T equal_count = 0;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
index ad2e387b08..e17615db67 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERCHANNEL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERCHANNEL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalNudgePerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                         float *nudge_min, float *nudge_max, float *scale, const int channel_num, const bool symmetric,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
index dda95ed781..5f6675b2d7 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERLAYER_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERLAYER_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalNudgePerLayer(float *input_min, float *input_max, const float quant_min, const float quant_max,
                       float *nudge_min, float *nudge_max, float *scale, const bool symmetric, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
index c2fd5ecd70..bc400eb704 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "include/cuda_runtime.h"
-#include "kernel/gpu/cuda_impl/float_status_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh"
 
 template <typename T>
 __global__ void IsNan(const size_t size, const T* input, bool* out) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
index da488ff937..fbe063e72a 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_FLOATSTATUS_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_FLOATSTATUS_H_
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void CalFloatStatus(const size_t size, const T *input, T *output, cudaStream_t stream);
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
index ea6ffdbbdc..be4415d509 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/ftrl_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
 
 template <typename T>
 __device__ __forceinline__ T PowFunc(T x, T y) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
index ba4a8fa816..b5f0f82afe 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regularization,
                const T *l2_regularization, const T *learning_rate_power, T *variable, T *accumulation, T *linear,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
index 6bde359d9b..03b58b81a0 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
@@ -15,8 +15,8 @@
  */
 
 #include <iostream>
-#include "kernel/gpu/cuda_impl/gather.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gather.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 __global__ void GatherKernel(T *input, S *indices, T *output, size_t output_dim0, size_t output_dim1,
                              size_t output_dim2, size_t input_dim1) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
index e460caec9e..a4dc6648cc 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
index 7a8e1fae8a..1e69f26d57 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_GELU_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_GELU_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template<typename T>
 void Gelu(size_t input_size, T* input_addr, T* output_addr, cudaStream_t cuda_stream);
 
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
index e887b98eca..fcb7418952 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
@@ -17,8 +17,8 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 constexpr int NUM_PER_THREAD_REDUCE = 4;
 constexpr int WARP_SIZE = 32;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
index 9f7d57cdb9..13d7a58614 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
index cfb60f0ba6..138300b303 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 constexpr int NUM_PER_THREAD_REDUCE = 4;
 constexpr int WARP_SIZE = 32;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
index c06a698384..9548b30d44 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 struct DynamicSharedMem;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
index 27b2cb0232..3915dba172 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
@@ -20,7 +20,7 @@
 #include <thrust/reduce.h>
 #include <thrust/pair.h>
 #include "minmax_update_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 __global__ void UpdateInputMinMaxPerLayerWithEMA(const float *input_min, const float *input_max, float *output_min,
                                                  float *output_max, const float min, const float max,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
index 5e9becab38..b4b4d582ee 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
                          const int total_num, const int channel_num, const float ema_decay, const bool ema,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
index 5405f5ef1d..62708663ad 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate, const T *gradient,
                             const S *momentum, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
index cf5dc7ecd0..6dc4d676f2 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "one_hot_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 __global__ void OneHotKernel(size_t size, const S *indices, size_t depth, const T *on_value, const T *off_value,
                              size_t left_dim_size, size_t right_dim_size, T *output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
index ddc615d94b..3bb4d04a01 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
@@ -16,7 +16,7 @@
 
 #include <stdio.h>
 #include <stdint.h>
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
 
 template <typename T>
 __global__ void Pad(const size_t size, const T* input, const int num, const int channels, const int old_height,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
index dc3036b8b6..b10804fdab 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_PADIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_PADIMPL_H_
 #include <cuda_runtime.h>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalPad(const size_t size, const T* input, const int num, const int channels, const int old_height,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu
new file mode 100644
index 0000000000..6f99394562
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+#include "random_op_impl.cuh"
+template <typename T>
+__global__ void NormalKernel(int seed, curandState *globalState, T *output, size_t count) {
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
+    curand_init(seed, i, 0, &globalState[i]);
+    output[i] = curand_normal(&globalState[i]);
+  }
+  return;
+}
+
+template <typename T>
+void StandardNormal(int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) {
+  int RNG_seed = 0;
+  if (seed2 != 0) {
+    RNG_seed = seed2;
+  } else if (seed != 0) {
+    RNG_seed = seed;
+  } else {
+    RNG_seed = time(NULL);
+  }
+  NormalKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(RNG_seed, globalState, output, count);
+  return;
+}
+
+template void StandardNormal<float>(int seed, int seed2, curandState *globalState,
+                                    float *output, size_t count, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh
new file mode 100644
index 0000000000..b099ead9bf
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+template <typename T>
+void StandardNormal(int seed, int seed2, curandState *globalState,
+                    T *output, size_t count, cudaStream_t cuda_stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
index 913aaa3b8d..80806b552f 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
@@ -15,8 +15,8 @@
  */
 
 #include <iostream>
-#include "kernel/gpu/cuda_impl/rmsprop_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void RmsPropKernel(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
index b5802dbb67..16ad611381 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RMSPROP_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RMSPROP_H_
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable, T* mean_square,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
index f07a820e75..f7086f8093 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <include/cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/select_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh"
 
 template <typename T>
 __global__ void Select(const size_t size, const bool* cond, const T* input_x, const T* input_y, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
index da2d7d9a7f..e201ab352c 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SELECT_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SELECT_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalSelect(const size_t size, const bool* cond, const T* input_x, const T* input_y, T* output,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
index a0082b84c8..f0c64bfb01 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
 
 template <typename T, typename S>
 __global__ void SigmoidCrossEntropyWithLogitsGradKernel(const size_t size, const T *logits, const S *labels,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
index 2cd4922d25..6b444d6c02 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, T *outputs,
                                        cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
index 3766f367db..7425ac3809 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
 
 template <typename T, typename S>
 __global__ void SigmoidCrossEntropyWithLogitsKernel(const size_t size, const T *logits, const S *labels, T *outputs) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
index 575605bde0..7e9130857f 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void SigmoidCrossEntropyWithLogits(const size_t size, const T *logits, const S *labels, T *outputs,
                                    cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
similarity index 99%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
index e49a22bb46..dd4effc174 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
@@ -18,7 +18,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <algorithm>
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 template <typename T>
 __global__ void Slice4D(const int s1, const int s2, const int s3, const int s4,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
index 9513d6ed24..e04f277c3d 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
@@ -19,7 +19,7 @@
 
 #include <cuda_runtime.h>
 #include <vector>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 
 template <typename T>
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu
new file mode 100644
index 0000000000..9050044b7f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "smooth_l1_loss_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
+
+template <typename T>
+__global__ void SmoothL1LossKernel(const int input_size, const float sigma, const T *prediction, const T *target,
+                                   T *loss) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+    T value = (prediction[i] - target[i]) > 0 ? (prediction[i] - target[i]) : (target[i] - prediction[i]);
+    if (value < sigma) {
+      loss[i] = static_cast<T>(0.5) * value * value;
+    } else {
+      loss[i] = value - static_cast<T>(0.5);
+    }
+  }
+}
+
+template <typename T>
+void SmoothL1Loss(const int &input_size, const float &sigma, const T *prediction, const T *target, T *loss,
+                  cudaStream_t stream) {
+  SmoothL1LossKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, sigma, prediction, target, loss);
+}
+
+template <typename T>
+__global__ void SmoothL1LossGradKernel(const int input_size, const float sigma, const T *prediction, const T *target,
+                                       const T *dloss, T *dx) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+    T value = prediction[i] - target[i];
+    if (value > static_cast<T>(sigma)) {
+      dx[i] = dloss[i];
+    } else if (value < static_cast<T>(-sigma)) {
+      dx[i] = -dloss[i];
+    } else {
+      dx[i] = value * dloss[i];
+    }
+  }
+}
+
+template <typename T>
+void SmoothL1LossGrad(const int &input_size, const float &sigma, const T *prediction, const T *target, const T *dloss,
+                      T *dx, cudaStream_t stream) {
+  SmoothL1LossGradKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, sigma, prediction, target,
+                                                                             dloss, dx);
+}
+
+template void SmoothL1Loss(const int &input_size, const float &sigma, const float *prediction, const float *target,
+                           float *loss, cudaStream_t stream);
+template void SmoothL1LossGrad(const int &input_size, const float &sigma, const float *prediction, const float *target,
+                               const float *dloss, float *dx, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh
new file mode 100644
index 0000000000..7938e18a3b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
+template <typename T>
+void SmoothL1Loss(const int &input_size, const float &sigma, const T *prediction, const T *target, T *loss,
+                  cudaStream_t stream);
+template <typename T>
+void SmoothL1LossGrad(const int &input_size, const float &sigma, const T *prediction, const T *target, const T *dloss,
+                      T *dx, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
index d16131470c..fa32260381 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPARSECROSSENTROPYCUDAIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPARSECROSSENTROPYCUDAIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalCrossEntropy(const float *logits, T *labels, const int batch_size, const int class_num, float *loss,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
index a0fea90136..ffcb2c8052 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
@@ -16,7 +16,7 @@
 
 #include <cuda_runtime.h>
 #include "transpose_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 __global__ void Transpose(const int size, const T* input, const int* input_shape, const int* input_axis,
                           const int shape_size, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
index 623b1a8c03..cf8b30866e 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNARYOPIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNARYOPIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void Exponential(T *input, T *output, size_t count, cudaStream_t cuda_stream);
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
index a7affd4705..3d299c2352 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/unsorted_segment_sum.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
 
 template<typename T, typename S>
 __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
index ef95032996..315677fde4 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNSORT_SEGMENT_SUM_H_
 
 #include <cuda_runtime.h>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template<typename T, typename S>
 void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
index 777310cebc..3c88b88c74 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_init_kernel.h"
-#include "kernel/gpu/data/dataset_utils.h"
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "backend/kernel_compiler/gpu/data/dataset_init_kernel.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
index 318049f4ad..f8cc9b19ea 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
@@ -19,8 +19,8 @@
 
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
index 13ca191b0b..67a487ce28 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_iterator_kernel.h"
+#include "backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h"
 #include <cuda_runtime_api.h>
 #include <string>
 #include <vector>
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_common.h"
-#include "kernel/gpu/data/dataset_utils.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
index cdd7a47e7b..746aed3294 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
@@ -19,8 +19,8 @@
 
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
index 846a63f84f..cb014a3d2b 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_utils.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
index c935798f06..4c179f2173 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
@@ -21,11 +21,11 @@
 #include <cudnn.h>
 #include <string>
 #include <vector>
-#include "kernel/kernel.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_common.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "backend/session/anf_runtime_algorithm.h"
 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
index b00b5c263d..4a0191abd7 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 #include <iostream>
 #include <string>
 
 #include "common/utils.h"
-#include "device/kernel_info.h"
-#include "device/gpu/cuda_common.h"
-#include "kernel/common_utils.h"
+#include "runtime/device/kernel_info.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
@@ -137,7 +137,7 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
 }
 
 GpuKernel *GpuKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
-  auto kernel_info = apply_kernel->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_Info);
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
index dc5f61a315..8834fa0f1a 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "kernel/gpu/gpu_kernel.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/kernel_constants.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/kernel_constants.h
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/kernel_constants.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/kernel_constants.h
diff --git a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
index 4683f015ae..86c7d8c108 100644
--- a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/addn_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/addn_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
index 1498da777f..b69bd20216 100644
--- a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
@@ -19,9 +19,11 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
@@ -43,18 +45,26 @@ class AddNGpuFwdKernel : public GpuKernel {
   const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-              const std::vector<AddressPtr> &outputs, void *) override {
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
     if (is_null_input_) {
       return true;
     }
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
+    if (cudnn_data_type_ == CUDNN_DATA_INT32) {
+      FillDeviceArray(outputs[0]->size / sizeof(T), output_addr, 0.0f, reinterpret_cast<cudaStream_t>(stream_ptr));
+    }
     const float alpha = 1;
     const float beta = 0;
     for (size_t i = 0; i < IntToSize(num_input_); i++) {
       T *input_addr = GetDeviceAddress<T>(inputs, i);
-      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnAddTensor(cudnn_handle_, &alpha, input_descriptor_, input_addr,
-                                                 &(i > 0 ? alpha : beta), input_descriptor_, output_addr),
-                                  "cudnnAddTensor failed");
+      if (cudnn_data_type_ == CUDNN_DATA_INT32) {
+        NoBroadcast(outputs[0]->size / sizeof(T), BROADCAST_TYPE_ADD, input_addr, output_addr, output_addr,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+      } else {
+        CHECK_CUDNN_RET_WITH_EXCEPT(cudnnAddTensor(cudnn_handle_, &alpha, input_descriptor_, input_addr,
+                                                   &(i > 0 ? alpha : beta), input_descriptor_, output_addr),
+                                    "cudnnAddTensor failed");
+      }
     }
     return true;
   }
@@ -100,9 +110,8 @@ class AddNGpuFwdKernel : public GpuKernel {
   }
   void InitSizeLists() override {
     if (!is_null_input_) {
-      CHECK_CUDNN_RET_WITH_EXCEPT(
-        cudnnGetTensorSizeInBytes(input_descriptor_, reinterpret_cast<size_t *>(&input_size_)),
-        "cudnnGetTensorSizeInBytes failed");
+      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(input_descriptor_, &input_size_),
+                                  "cudnnGetTensorSizeInBytes failed");
     }
     for (int i = 0; i < num_input_; i++) {
       input_size_list_.push_back(input_size_);
diff --git a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
index 2ae1728ca3..bffcca158b 100644
--- a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/assign_add_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
index db69fd7be6..04a74b3412 100644
--- a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
@@ -19,9 +19,9 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/assign_add_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
index 5684f0c424..a07fb6ddf6 100644
--- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/bias_add_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
index 5a664db2e1..fd344be28a 100644
--- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
index 96d51b704c..41e7147328 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/broadcast_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
index be7d3a19d4..aaf827723a 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
@@ -21,10 +21,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/broadcast_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
index 85598cf940..49be2fd9a6 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/broadcast_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
index f1eb5fecf9..6258c5c4e2 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
@@ -21,10 +21,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/broadcast_grad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
index f3c3b6164d..3103f30f52 100644
--- a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/equalcount_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
index 7d3f74970f..eae7a893b7 100644
--- a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_EQUALCOUNT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/equalcount_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
index 374644eaf5..313669a647 100644
--- a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/float_status_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
index 1aa9b18684..be74f2e9dc 100644
--- a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/float_status_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
index 808d599853..471c394598 100644
--- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/matmul_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
index 3ee3493ed6..7888d442c9 100644
--- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cublas_v2.h>
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc
new file mode 100644
index 0000000000..c72c271c52
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(StandardNormal, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
+                      RandomOpGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h
new file mode 100644
index 0000000000..785ac02ee5
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
+
+#include <curand_kernel.h>
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+enum RandomOptype { RANDOM_OP_NORMAL = 0, RANDOM_OP_INVALID_TYPE = 255 };
+
+const std::map<std::string, RandomOptype> kRandomOpTypeMap = {{"StandardNormal", RANDOM_OP_NORMAL}};
+template <typename T>
+class RandomOpGpuKernel : public GpuKernel {
+ public:
+  RandomOpGpuKernel()
+      : random_op_type_(RANDOM_OP_INVALID_TYPE),
+        input_size_0_(0),
+        output_size_(sizeof(T)),
+        workspace_size_(sizeof(curandState)) {}
+  ~RandomOpGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    void *workspace_addr = GetDeviceAddress<void *>(workspace, 0);
+    curandState *devStates = reinterpret_cast<curandState *>(workspace_addr);
+    T *output_addr = GetDeviceAddress<T>(outputs, 0);
+
+    switch (random_op_type_) {
+      case RANDOM_OP_NORMAL: {
+        StandardNormal(seed_, seed2_, devStates, output_addr, outputs[0]->size / sizeof(T),
+                       reinterpret_cast<cudaStream_t>(stream_ptr));
+        break;
+      }
+      default: {
+        MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";
+      }
+    }
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
+    auto iter = kRandomOpTypeMap.find(kernel_name);
+    if (iter == kRandomOpTypeMap.end()) {
+      MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported.";
+    } else {
+      random_op_type_ = iter->second;
+    }
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    if (input_num != 1) {
+      MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 1 input.";
+      return false;
+    }
+    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+    if (output_num != 1) {
+      MS_LOG(ERROR) << "Output number is " << output_num << ", but random op needs 1 output.";
+      return false;
+    }
+    auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape_0.size(); i++) {
+      input_size_0_ += input_shape_0[i];
+    }
+    input_size_0_ *= sizeof(int);
+    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < output_shape.size(); i++) {
+      output_size_ *= output_shape[i];
+      workspace_size_ *= output_shape[i];
+    }
+    seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
+    seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_0_);
+    output_size_list_.push_back(output_size_);
+    workspace_size_list_.push_back(workspace_size_);
+  }
+
+ private:
+  RandomOptype random_op_type_;
+  size_t input_size_0_;
+  size_t output_size_;
+  size_t workspace_size_;
+  int seed_;
+  int seed2_;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
index 77f53fc417..ae8e7bbd0b 100644
--- a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/unary_op_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
index 4503b805f6..26993bc3bd 100644
--- a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/unary_op_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
index 6993085a75..c6e3c4c043 100644
--- a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nccl/nccl_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
similarity index 88%
rename from mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
index b5ab46a67d..9701738bfc 100644
--- a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
@@ -23,10 +23,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "device/gpu/distribution/collective_init.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 
 namespace mindspore {
 namespace kernel {
@@ -40,9 +40,11 @@ const std::map<std::string, NcclKernelType> kNcclTypeMap = {
 static std::map<std::string, ncclDataType_t> kNcclDtypeMap = {
   {"kNumberTypeFloat32", ncclFloat}, {"kNumberTypeFloat16", ncclHalf}, {"kNumberTypeInt32", ncclInt}};
 
-typedef ncclResult_t (*AllReduce)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t);
-typedef ncclResult_t (*AllGather)(const void *, void *, size_t, ncclDataType_t, cudaStream_t);
-typedef ncclResult_t (*ReduceScatter)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t);
+typedef ncclResult_t (*AllReduce)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t,
+                                  const std::string &);
+typedef ncclResult_t (*AllGather)(const void *, void *, size_t, ncclDataType_t, cudaStream_t, const std::string &);
+typedef ncclResult_t (*ReduceScatter)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t,
+                                      const std::string &);
 
 template <typename T>
 class NcclGpuKernel : public GpuKernel {
@@ -50,6 +52,7 @@ class NcclGpuKernel : public GpuKernel {
   NcclGpuKernel()
       : nccl_kernel_type_(NCCL_INVALID_TYPE),
         nccl_reduce_type_(ncclSum),
+        group_name_(""),
         input_size_(0),
         output_size_(0),
         collective_handle_(nullptr),
@@ -71,7 +74,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<AllReduce>(dlsym(const_cast<void *>(collective_handle_), "AllReduce"));
         MS_EXCEPTION_IF_NULL(all_reduce_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT((*all_reduce_funcptr)(input_addr, output_addr, output_size_ / sizeof(T),
-                                                         nccl_data_type_, nccl_reduce_type_, stream),
+                                                         nccl_data_type_, nccl_reduce_type_, stream, group_name_),
                                    "ncclAllReduce failed");
         break;
       }
@@ -80,7 +83,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<AllGather>(dlsym(const_cast<void *>(collective_handle_), "AllGather"));
         MS_EXCEPTION_IF_NULL(all_gather_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT(
-          (*all_gather_funcptr)(input_addr, output_addr, input_size_ / sizeof(T), nccl_data_type_, stream),
+          (*all_gather_funcptr)(input_addr, output_addr, input_size_ / sizeof(T), nccl_data_type_, stream, group_name_),
           "ncclAllGather failed");
         break;
       }
@@ -89,7 +92,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<ReduceScatter>(dlsym(const_cast<void *>(collective_handle_), "ReduceScatter"));
         MS_EXCEPTION_IF_NULL(reduce_scatter_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT((*reduce_scatter_funcptr)(input_addr, output_addr, output_size_ / sizeof(T),
-                                                             nccl_data_type_, nccl_reduce_type_, stream),
+                                                             nccl_data_type_, nccl_reduce_type_, stream, group_name_),
                                    "ncclReduceScatter failed");
         break;
       }
@@ -121,15 +124,18 @@ class NcclGpuKernel : public GpuKernel {
       output_size_list_.push_back(size);
       output_size_ += size;
     }
-    InferCommType(kernel_node);
-    collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
-    MS_EXCEPTION_IF_NULL(collective_handle_);
 
+    InferCommType(kernel_node);
+    group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
+    MS_LOG(INFO) << AnfAlgo::GetCNodeName(kernel_node) << " for group " << group_name_;
     auto comm_stream_attr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stream_id");
     if (comm_stream_attr) {
       comm_stream_ = reinterpret_cast<cudaStream_t>(GetValue<uintptr_t>(comm_stream_attr));
       MS_EXCEPTION_IF_NULL(comm_stream_);
     }
+
+    collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
+    MS_EXCEPTION_IF_NULL(collective_handle_);
     return true;
   }
 
@@ -146,7 +152,7 @@ class NcclGpuKernel : public GpuKernel {
       nccl_kernel_type_ = iter->second;
     }
 
-    auto reduce_op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("op");
+    auto reduce_op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrOp);
     if (reduce_op) {
       std::string type = GetValue<std::string>(reduce_op);
       if (type == "sum") {
@@ -167,6 +173,7 @@ class NcclGpuKernel : public GpuKernel {
   NcclKernelType nccl_kernel_type_;
   ncclRedOp_t nccl_reduce_type_;
   ncclDataType_t nccl_data_type_;
+  std::string group_name_;
   size_t input_size_;
   size_t output_size_;
   std::vector<size_t> input_size_list_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
index 5e80cccd75..334550b213 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/activation_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
index bf6cfa7b23..d651da75e0 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
index 35d11f8b47..8fd486c08c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/activation_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/activation_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
index 38e34eb752..ffdb618098 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
@@ -20,9 +20,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
index 049a5cc280..0f89eb4419 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/adam_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
index 93c6381ab3..e2fc87ed51 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_ADAM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/adam_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
index ce6c9beeb7..6131aa8568 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/bias_add_grad_gpu_kenel.h"
+#include "backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
index 9b4f18d24c..3e15b818be 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
@@ -23,9 +23,9 @@
 #include <string>
 #include <algorithm>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
index df6825e079..f9bb710b94 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
index f51cbfef33..6072614e22 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
index 28e9a10ccc..ca16e1a18c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index 0d7be25772..638da4a99f 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
index 12b6f91537..d8441fb67c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
index a33ea5b4da..a9a1e5c0cc 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc
new file mode 100644
index 0000000000..155451875c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(CTCLossV2,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      CtcLossGpuKernel, float)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h
new file mode 100644
index 0000000000..8b02354516
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h
@@ -0,0 +1,166 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class CtcLossGpuKernel : public GpuKernel {
+ public:
+  CtcLossGpuKernel()
+      : cudnn_handle_(nullptr),
+        probs_desc_(nullptr),
+        ctcloss_desc_(nullptr),
+        label_size_(0),
+        input_lengths_size_(0),
+        label_lengths_size_(0) {}
+  ~CtcLossGpuKernel() override { DestroyResource(); }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    float *probs = GetDeviceAddress<float>(inputs, 0);
+    int *labels = GetDeviceAddress<int>(inputs, 1);
+    int *input_lengths = GetDeviceAddress<int>(inputs, 2);
+    int *label_lengths = GetDeviceAddress<int>(inputs, 3);
+    float *costs = GetDeviceAddress<float>(outputs, 0);
+    float *grads = GetDeviceAddress<float>(outputs, 1);
+
+    // Copy labels/input_lengths/label_length to host as cudnn7.x.x requires
+    void *labels_host = nullptr;
+    void *input_lengths_host = nullptr;
+    void *label_lengths_host = nullptr;
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&labels_host, inputs[1]->size), "cudaMallocHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&input_lengths_host, inputs[2]->size), "cudaMallocHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&label_lengths_host, inputs[3]->size), "cudaMallocHost failed.");
+    cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_ptr);
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(labels_host, labels, inputs[1]->size, cudaMemcpyDeviceToHost, stream),
+                               "cudaMemcpyAsync failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(
+      cudaMemcpyAsync(input_lengths_host, input_lengths, inputs[2]->size, cudaMemcpyDeviceToHost, stream),
+      "cudaMemcpyAsync failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(
+      cudaMemcpyAsync(label_lengths_host, label_lengths, inputs[3]->size, cudaMemcpyDeviceToHost, stream),
+      "cudaMemcpyAsync failed.");
+
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
+    size_t workspace_size = 0;
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnGetCTCLossWorkspaceSize(cudnn_handle_, probs_desc_, probs_desc_, reinterpret_cast<int *>(labels_host),
+                                   reinterpret_cast<int *>(label_lengths_host),
+                                   reinterpret_cast<int *>(input_lengths_host), CUDNN_CTC_LOSS_ALGO_DETERMINISTIC,
+                                   ctcloss_desc_, &workspace_size),
+      "cudnnGetCTCLossWorkspaceSize failed.");
+    void *workspace = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(workspace_size);
+    if (workspace == nullptr) {
+      MS_LOG(EXCEPTION) << "Failed to alloc workspace, size: " << workspace_size;
+    }
+
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnCTCLoss(cudnn_handle_, probs_desc_, probs, reinterpret_cast<int *>(labels_host),
+                   reinterpret_cast<int *>(label_lengths_host), reinterpret_cast<int *>(input_lengths_host), costs,
+                   probs_desc_, grads, CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, ctcloss_desc_, workspace, workspace_size),
+      "cudnnCtcLoss failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
+
+    device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(workspace);
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(label_lengths_host), "cudaFreeHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(input_lengths_host), "cudaFreeHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(labels_host), "cudaFreeHost failed.");
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    InitResource();
+    auto probs_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    if (probs_shape.size() != 3) {
+      MS_LOG(EXCEPTION) << "probs dims: " << probs_shape.size() << " not support.";
+    }
+    probs_dims_[0] = probs_shape[0];
+    probs_dims_[1] = probs_shape[1];
+    probs_dims_[2] = probs_shape[2];
+
+    auto labels_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    if (labels_dims.size() != 1 && labels_dims.size() != 2) {
+      MS_LOG(EXCEPTION) << "labels dims: " << labels_dims.size() << " not support.";
+    }
+    label_size_ = sizeof(int);
+    for (auto i : labels_dims) {
+      label_size_ *= i;
+    }
+
+    auto input_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    input_lengths_size_ = input_length_dims[0] * sizeof(int);
+    auto label_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    label_lengths_size_ = label_length_dims[0] * sizeof(int);
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnSetTensorNdDescriptorEx(probs_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 3, probs_dims_),
+      "cudnnSetTensorNdDescriptorEx failed.");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetCTCLossDescriptorEx(ctcloss_desc_, CUDNN_DATA_FLOAT,
+                                                            CUDNN_LOSS_NORMALIZATION_SOFTMAX, CUDNN_PROPAGATE_NAN),
+                                "cudnnSetCTCLossDescriptorEx failed.");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitResource() override {
+    cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&probs_desc_), "cudnnCreateTensorDescriptor failed.");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateCTCLossDescriptor(&ctcloss_desc_), "cudnnCreateCTCLossDescriptor failed.");
+  }
+
+  void InitSizeLists() override {
+    input_size_list_.push_back(probs_dims_[0] * probs_dims_[1] * probs_dims_[2] * sizeof(float));
+    input_size_list_.push_back(label_size_);
+    input_size_list_.push_back(input_lengths_size_);
+    input_size_list_.push_back(label_lengths_size_);
+
+    output_size_list_.push_back(probs_dims_[1] * sizeof(float));
+    output_size_list_.push_back(probs_dims_[0] * probs_dims_[1] * probs_dims_[2] * sizeof(float));
+  }
+
+ private:
+  void DestroyResource() noexcept {
+    CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyCTCLossDescriptor(ctcloss_desc_), "cudnnDestroyCTCLossDescriptor failed.");
+    CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(probs_desc_), "cudnnDestroyTensorDescriptor failed.");
+  }
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+
+  cudnnHandle_t cudnn_handle_;
+  cudnnTensorDescriptor_t probs_desc_;
+  cudnnCTCLossDescriptor_t ctcloss_desc_;
+  int probs_dims_[3] = {0};
+  int label_size_;
+  int input_lengths_size_;
+  int label_lengths_size_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
index 459010e9e9..423a230b6e 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/dropout_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
index 4dfacb7ca1..2104d7af35 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_DROPOUT_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/dropout_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
 #include "include/curand.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
index 2fd21c96ee..faf884c2eb 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/dropout_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
index e6683e15dd..a3a7250c9b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_DROPOUT_GRAD_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/dropout_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
index f9c993d31d..d8206aedcd 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/flatten_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
index 3b0ad8c946..a140579a3c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
index 0e079d137b..c07126a2ed 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/flatten_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
index 0748dc77db..b21327bc3b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
index 4d30130931..0186153745 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/ftrl_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
index 9e2153965b..ea08741dba 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FTRL_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/ftrl_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
index 77cb7f8608..5ef2fd8786 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_adam_weight_decay.h"
+#include "backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h"
 
 namespace mindspore {
 namespace kernel {
@@ -47,6 +47,5 @@ MS_REG_GPU_KERNEL_ONE(FusedAdam,
                         .AddInputAttr(kNumberTypeFloat32)
                         .AddOutputAttr(kNumberTypeFloat32),
                       FusedAdamWeightDecayGpuKernel, float)
-
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
index f13f6ed59f..c4fd31a737 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_ADAM_WEIGHT_DECAY_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
index 91747d24d8..2ce39b63a0 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_batch_norm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
index b0a898209b..774428dc40 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_BATCH_NORM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
index 3947aaea9a..546e034f6b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
index 712354b17c..a2d0d741b1 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_BATCHNORM_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
index 32d91be80a..274e4896c9 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/gelu_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
index 6415349012..823da1fe9f 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GRAD_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
index ca54ff68ad..03cd9a155b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/gelu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/gelu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
similarity index 90%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
index 60968d109b..76d3861d55 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
index 19e4dc17a6..49f556ae64 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/layer_norm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
index d5ec3ff8f2..74669e03de 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
index 7991d42499..b59f95b8a2 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/layer_norm_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
index 83bdedb9b3..93967adad3 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
index c745c216f7..a24aaeeb96 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
index 42eda96b02..ad3e588f00 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
index ab88308d4e..1fa47690b3 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_grad_data_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
index 6eeefa262c..6d6bed5555 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
index 856a986e07..9ec239491f 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
index a1a4852c84..445d2ce199 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
index e8b2b17706..99ae2affe8 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/momentum_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
index 5abfb9e97b..32d3fbb079 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_MOMENTUM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/momentum_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
index e871af360a..902b0d9faf 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/pooling_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
index 0dda1e8998..908a4e9b99 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
index c3d4a44943..2948c900d2 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/pooling_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
index e8f1ebc1af..a066eacfa0 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
index 032e8eeec4..c33909a82b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/rmsprop_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
index 9e148b690d..9811c71094 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_RMSPROP_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/rmsprop_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
index 1e650811fd..96d2d29549 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
index 8d0efe90b4..a2d3aabb68 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
index dabc4df850..05c9a4234b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
index 01f416f6b7..88ab46a6ba 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc
new file mode 100644
index 0000000000..ea40bea6a4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(
+  SmoothL1Loss,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  SmoothL1LossGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
new file mode 100644
index 0000000000..dc20f75077
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
+
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class SmoothL1LossGpuKernel : public GpuKernel {
+ public:
+  SmoothL1LossGpuKernel() : input_size_(1), sigma_(1.0) {}
+  ~SmoothL1LossGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    T *prediction = GetDeviceAddress<T>(inputs, 0);
+    T *target = GetDeviceAddress<T>(inputs, 1);
+    T *loss = GetDeviceAddress<T>(outputs, 0);
+
+    SmoothL1Loss(input_size_, sigma_, prediction, target, loss, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape.size(); i++) {
+      input_size_ *= input_shape[i];
+    }
+
+    sigma_ = GetAttr<float>(kernel_node, "sigma");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    output_size_list_.push_back(input_size_ * sizeof(T));
+  }
+
+ private:
+  size_t input_size_;
+  float sigma_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc
new file mode 100644
index 0000000000..8a4fb38460
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(SmoothL1LossGrad,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      SmoothL1LossGradGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
new file mode 100644
index 0000000000..02be336932
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
+
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class SmoothL1LossGradGpuKernel : public GpuKernel {
+ public:
+  SmoothL1LossGradGpuKernel() : input_size_(1), sigma_(1.0) {}
+  ~SmoothL1LossGradGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    T *prediction = GetDeviceAddress<T>(inputs, 0);
+    T *target = GetDeviceAddress<T>(inputs, 1);
+    T *dloss = GetDeviceAddress<T>(inputs, 2);
+    T *dx = GetDeviceAddress<T>(outputs, 0);
+
+    SmoothL1LossGrad(input_size_, sigma_, prediction, target, dloss, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape.size(); i++) {
+      input_size_ *= input_shape[i];
+    }
+
+    sigma_ = GetAttr<float>(kernel_node, "sigma");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    output_size_list_.push_back(input_size_ * sizeof(T));
+  }
+
+ private:
+  size_t input_size_;
+  float sigma_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
index 160a26d200..8a64762c0a 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
index 8256174bcb..e56cb96fd7 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,10 +19,10 @@
 
 #include <stdint.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/cross_entropy_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
index b9667ed85b..24c2c12601 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
index 9d5a2a24e1..279bac3aa9 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SOFTMAX_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
index 5b07136522..bd20413d08 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
index d73503d5a5..b814be9969 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SOFTMAX_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
index 537eeb5726..81b46f520c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
index 6950f0e308..bcb8a6b333 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,10 +19,10 @@
 
 #include <stdint.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/cross_entropy_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
index 0f3e0c95f4..4e07463a6c 100644
--- a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/other/assign_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/other/assign_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
index b41d583a43..76e863393c 100644
--- a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ASSIGN_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
index af95767407..92652f67f9 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
index b898f34689..83600e20df 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_BATCHNORMFOLD2_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
index 93862aeedd..6fc080713a 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
index e0bafdb96a..3335210925 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_BATCHNORMFOLD2_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
index 4f968a0fa3..95349c84aa 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
index 6cd001fd2e..11b150686c 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_BATCHNORM_FOLD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
index 93ea66258d..b727c6c7df 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
index 7a3ed7ef91..93a3cbf46e 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_BATCHNORM_FOLD_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
index a914b6ec14..9af5451c53 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/correction_mul_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
index 29aeabb03a..4ba6285e4b 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CORRECTIONMUL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
index 28b5d56e68..63a47bc452 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/correction_mul_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
index 3feffa586b..b9fcbf0787 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CORRECTIONMULGRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
index 8db6ddd848..8a43ce0941 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
index 122fe96af3..8e2c9524b2 100755
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PER_CHANNEL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
index 5c774c05ed..598a6a960d 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
index d863a2c99f..c2611ab8a2 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PER_CHANNEL_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
index 44869983eb..24edec97a9 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
index 38810e06df..6df4da3104 100755
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
index c8d57b2bb1..f96b6a48d2 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
index ae2ea5bfac..475723f684 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
index a8ce72148b..742a9b8c55 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
index 563a583ca1..9a0fe23e6a 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
index 3659665b23..8f11e907e1 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
index a237b6dc26..80ce6185c0 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
index 87fb8d743d..5ec4f52574 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel.h"
-#include "device/ascend/tasksink/runtime_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
+#include "utils/context/ms_context.h"
 
 using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
 using ge::model_runner::HcclTaskInfo;
@@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
                << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
                << ", data_type=" << static_cast<int>(data_type);
 
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
   HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
-    stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
-    hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
-    RuntimeUtils::HcomDistribute);
+    kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
+    private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
+    RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
index 72e202591f..db7a0fbf7c 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
@@ -23,8 +23,8 @@
 #include <vector>
 #include <algorithm>
 #include <utility>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/hccl/hcom_util.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
 #include "hccl/hcom.h"
 #include "common/utils.h"
 
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
index d6e4aa09b9..8297be0b6d 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel_build.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_build.h"
 
 #include <string>
 #include <memory>
 #include <algorithm>
 
-#include "kernel/hccl/hccl_kernel.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
index f20760a3eb..21b34d6522 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
@@ -19,7 +19,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
similarity index 68%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
index 601d5cf1ea..55742d383c 100755
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
@@ -14,14 +14,32 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel_metadata.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
 #include <memory>
+#include <set>
 #include "utils/utils.h"
-#include "kernel/hccl/hcom_util.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
+namespace {
+std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
+  const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0};
+  auto op_name = AnfAlgo::GetCNodeName(kernel_node);
+  auto format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, index);
+  if (op_name != kReduceScatter && op_name != kAllGatherOpName) {
+    return format;
+  }
+  if (format == kOpFormat_FRAC_NZ && AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, index).size() <= 2) {
+    return kOpFormat_DEFAULT;
+  }
+  if (kReduceNoSupportedSet.find(format) != kReduceNoSupportedSet.end()) {
+    return kOpFormat_DEFAULT;
+  }
+  return format;
+}
+}  // namespace
 void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
   const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
                                                  kNumberTypeFloat32, kNumberTypeInt16};
@@ -36,13 +54,13 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
     std::vector<std::string> inputs_format{};
     std::vector<TypeId> inputs_type{};
     for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
-      inputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index));
+      inputs_format.emplace_back(GetKernelFormat(kernel_node, input_index));
       inputs_type.push_back(type);
     }
     std::vector<std::string> outputs_format;
     std::vector<TypeId> outputs_type;
     for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
-      outputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, output_index));
+      outputs_format.emplace_back(GetKernelFormat(kernel_node, output_index));
       outputs_type.push_back(type);
     }
     auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
index b13393d3bd..25891fdaf6 100755
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
index 9dbe708ef9..e9fb4c9314 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_broadcast.h"
+#include "backend/kernel_compiler/hccl/hcom_all_broadcast.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
index ca8eba91af..6434b5fb9c 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
index 6494f7fd12..201071dcb5 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_gather.h"
+#include "backend/kernel_compiler/hccl/hcom_all_gather.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_gather.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
index 5de2c513cf..21d8ffa484 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
index 35a058e766..533ce1b087 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_reduce.h"
+#include "backend/kernel_compiler/hccl/hcom_all_reduce.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
index 939abd9de7..39641f7448 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
index dea516885d..32c6dacb01 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_reduce_scatter.h"
+#include "backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
index c734b517c6..2f4ace5aea 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/hccl/hcom_util.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
index 088dbe59d5..721c1b6ba0 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_util.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
 
 #include <memory>
 
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
similarity index 100%
rename from mindspore/ccsrc/kernel/hccl/hcom_util.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
diff --git a/mindspore/ccsrc/kernel/kash/kernel_pack.cc b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/kash/kernel_pack.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
index a87441031b..9933826f2b 100644
--- a/mindspore/ccsrc/kernel/kash/kernel_pack.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
@@ -15,12 +15,11 @@
  */
 
 #include <fstream>
-#include "mindspore/ccsrc/kernel/kernel.h"
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 #include "nlohmann/json.hpp"
 #include "securec/include/securec.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel.h
index 7bccce49c3..2d240338f3 100644
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
@@ -23,7 +23,7 @@
 #include "ir/dtype.h"
 #include "utils/utils.h"
 #include "ir/tensor.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
@@ -129,6 +129,10 @@ class KernelMod {
   virtual std::vector<size_t> GenParameters() { return {}; }
 
   virtual ~KernelMod() = default;
+  void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
+
+ protected:
+  std::string kernel_name_;
 };
 using KernelModPtr = std::shared_ptr<KernelMod>;
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/kernel_build_info.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
index c912a0c199..68392d1871 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include <algorithm>
 #include "utils/log_adapter.h"
 #include "debug/anf_ir_dump.h"
@@ -119,6 +119,8 @@ bool KernelBuildInfo::IsInputDefaultPadding() const { return input_reshape_type_
 
 bool KernelBuildInfo::IsOutputDefaultPadding() const { return output_reshape_type_.empty(); }
 
+bool KernelBuildInfo::operator!=(const KernelBuildInfo &other) const { return !((*this) == other); }
+
 void KernelBuildInfo::KernelBuildInfoBuilder::SetKernelType(const KernelType &kernel_type) {
   MS_EXCEPTION_IF_NULL(kernel_build_info_);
   kernel_build_info_->kernel_type_ = kernel_type;
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
similarity index 97%
rename from mindspore/ccsrc/kernel/kernel_build_info.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
index ca1083fd68..be243c9ae0 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
@@ -85,6 +85,8 @@ class KernelBuildInfo {
 
   bool operator==(const KernelBuildInfo &other) const;
 
+  bool operator!=(const KernelBuildInfo &other) const;
+
  public:
   static auto constexpr kInvalidFormat = "InvalidFormat";
 
diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/kernel_fusion.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
index be79eca15a..0045e49bef 100644
--- a/mindspore/ccsrc/kernel/kernel_fusion.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 
 #include <map>
 #include <string>
@@ -22,10 +22,10 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/kernel_fusion.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
similarity index 96%
rename from mindspore/ccsrc/kernel/kernel_fusion.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
index 8ded21787c..2fb3a05b4b 100644
--- a/mindspore/ccsrc/kernel/kernel_fusion.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_KERNEL_KERNELFUSION_H_
 #include <vector>
 #include <map>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 namespace mindspore {
 namespace kernel {
 /*
diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/kernel_query.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
index 4a8ae81afa..81b5d0f996 100755
--- a/mindspore/ccsrc/kernel/kernel_query.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_query.h"
+#include "backend/kernel_compiler/kernel_query.h"
 #include <memory>
 #include <algorithm>
-#include "kernel/aicpu/aicpu_kernel_metadata.h"
-#include "kernel/rts/rt_kernel_info.h"
-#include "kernel/hccl/hccl_kernel_metadata.h"
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h"
-#include "kernel/akg/akg_kernel_metadata.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
+#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/kernel_query.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
similarity index 93%
rename from mindspore/ccsrc/kernel/kernel_query.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
index 257b0cf073..20458f48d0 100644
--- a/mindspore/ccsrc/kernel/kernel_query.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/oplib/opinfo.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
similarity index 91%
rename from mindspore/ccsrc/kernel/oplib/opinfo.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
index f224a97efc..64ae1009d1 100644
--- a/mindspore/ccsrc/kernel/oplib/opinfo.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <unordered_map>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
@@ -103,13 +103,14 @@ class OpInfo {
     partial_flag_ = opinfo.partial_flag_;
     dynamic_format_ = opinfo.dynamic_format_;
     op_pattern_ = opinfo.op_pattern();
-    for (auto attr : opinfo.attrs_ptr()) {
+    processor_ = opinfo.processor_;
+    for (const auto &attr : opinfo.attrs_ptr()) {
       attrs_ptr_.push_back(std::make_shared<OpAttr>(*attr));
     }
-    for (auto input : opinfo.inputs_ptr()) {
+    for (const auto &input : opinfo.inputs_ptr()) {
       inputs_ptr_.push_back(std::make_shared<OpIOInfo>(*input));
     }
-    for (auto output : opinfo.outputs_ptr()) {
+    for (const auto &output : opinfo.outputs_ptr()) {
       outputs_ptr_.push_back(std::make_shared<OpIOInfo>(*output));
     }
     ref_infos_ = opinfo.ref_infos();
@@ -121,6 +122,7 @@ class OpInfo {
   std::string fusion_type() const { return fusion_type_; }
   std::string kernel_name() const { return kernel_name_; }
   OpPattern op_pattern() const { return op_pattern_; }
+  std::string processor() const { return processor_; }
   std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; }
   std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; }
   std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; }
@@ -136,6 +138,7 @@ class OpInfo {
   void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
   void set_partial_flag(const bool partial_flag) { partial_flag_ = partial_flag; }
   void set_op_pattern(const OpPattern op_pattern) { op_pattern_ = op_pattern; }
+  void set_processor(const std::string &processor) { processor_ = processor; }
   void add_attrs_ptr(const std::shared_ptr<OpAttr> &attr) { attrs_ptr_.push_back(attr); }
   void add_inputs_ptr(const std::shared_ptr<OpIOInfo> &input) { inputs_ptr_.push_back(input); }
   void add_outputs_ptr(const std::shared_ptr<OpIOInfo> &output) { outputs_ptr_.push_back(output); }
@@ -144,6 +147,10 @@ class OpInfo {
   void add_ref_pair(size_t out_index, size_t in_index) { (void)ref_infos_.emplace(out_index, in_index); }
   void ClearInputs() { (void)inputs_ptr_.clear(); }
   void ClearOutputs() { (void)outputs_ptr_.clear(); }
+  bool equals_to(const std::shared_ptr<OpInfo> &other_info) const {
+    return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ &&
+           this->processor_ == other_info->processor_;
+  }
 
  private:
   std::string op_name_;
@@ -157,6 +164,7 @@ class OpInfo {
   bool partial_flag_ = false;
   bool dynamic_format_ = false;
   OpPattern op_pattern_ = kCommonPattern;
+  std::string processor_;
   std::vector<std::shared_ptr<OpAttr>> attrs_ptr_;
   std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr_;
   std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr_;
diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
similarity index 80%
rename from mindspore/ccsrc/kernel/oplib/oplib.cc
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
index e01bbe9162..69c4ca7db1 100644
--- a/mindspore/ccsrc/kernel/oplib/oplib.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include <pybind11/pybind11.h>
 #include <unordered_map>
 #include <memory>
 #include <map>
+#include <fstream>
 #include "utils/log_adapter.h"
 #include "utils/overload.h"
 #include "utils/context/ms_context.h"
@@ -44,9 +45,10 @@ constexpr auto kAttr = "attr";
 constexpr auto kIputs = "inputs";
 constexpr auto kOutputs = "outputs";
 constexpr auto kAiCPU = "AiCPU";
+constexpr auto kAiCore = "AiCore";
+constexpr auto kCUDA = "CUDA";
 constexpr auto kTbe = "TBE";
-constexpr auto kAkg = "akg";
-constexpr auto kAutodiff = "AutoDiff";
+constexpr auto kAkg = "AKG";
 constexpr auto kName = "name";
 constexpr auto kParamType = "param_type";
 constexpr auto kDtype = "dtype";
@@ -57,9 +59,10 @@ constexpr auto kIndex = "index";
 constexpr auto kFormat = "format";
 constexpr auto kNeedCompile = "need_compile";
 constexpr auto kShape = "shape";
+constexpr auto kProcessor = "processor";
 std::vector<std::shared_ptr<OpInfo>> OpLib::op_info_;
 
-std::string ImplTypeToStr(OpImplyType impl_type) {
+static std::string ImplTypeToStr(OpImplyType impl_type) {
   switch (impl_type) {
     case kTBE:
       return kTbe;
@@ -80,7 +83,7 @@ bool OpLib::RegOp(const std::string &json_string, const std::string &impl_path)
     if (imply_type_string == kTbe) {
       OpImplyType imply_type = kTBE;
       ret = DecodeOpInfo(op_json, imply_type, impl_path);
-    } else if (imply_type_string == kAutodiff) {
+    } else if (imply_type_string == kAkg) {
       OpImplyType imply_type = kAKG;
       ret = DecodeOpInfo(op_json, imply_type, impl_path);
     } else if (imply_type_string == kAiCPU) {
@@ -124,6 +127,55 @@ void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_p
   }
 }
 
+void OpLib::DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info) {
+  MS_EXCEPTION_IF_NULL(op_info);
+  op_info->set_processor(obj.at(kProcessor));
+}
+
+bool OpLib::RegOpFromLocalInfo() {
+  MS_LOG(INFO) << "Start";
+  static bool has_load = false;
+  if (has_load) {
+    return true;
+  }
+  has_load = true;
+  std::string dir = common::GetEnv("MINDSPORE_OP_INFO_PATH");
+  if (dir.empty()) {
+    MS_LOG(INFO) << "MindSpore op info path does not been setted. use op info from python pass.";
+    return true;
+  }
+  char real_path[PATH_MAX] = {0};
+  if (dir.size() >= PATH_MAX) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#if defined(_WIN32) || defined(_WIN64)
+  if (_fullpath(real_path, common::SafeCStr(dir), PATH_MAX) == nullptr) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#else
+  if (realpath(common::SafeCStr(dir), real_path) == nullptr) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#endif
+  MS_LOG(INFO) << "Start to read op info from local file.";
+  std::ifstream file(real_path);
+  if (!file.is_open()) {
+    MS_LOG(ERROR) << "Find op info file failed.";
+    return false;
+  }
+  std::string line;
+  while (getline(file, line)) {
+    if (!line.empty()) {
+      (void)OpLib::RegOp(line, "");
+    }
+  }
+  MS_LOG(INFO) << "End";
+  return true;
+}
+
 bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpImplyType imply_type,
                          const std::string &impl_path) {
   std::shared_ptr<OpInfo> op_info = std::make_shared<OpInfo>();
@@ -134,6 +186,8 @@ bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpI
   op_info->set_fusion_type(obj.at(kFusionType));
   if (imply_type == kTBE) {
     DecodeTBESpecificInfo(obj, op_info);
+  } else if (imply_type == kAKG) {
+    DecodeAKGSpecificInfo(obj, op_info);
   }
   auto attrs = obj.at(kAttr);
   for (const auto &attr : attrs) {
@@ -160,14 +214,16 @@ bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpI
       return false;
     }
   }
+  if (CheckRepetition(op_info)) {
+    MS_LOG(WARNING) << "This op info has been already registed. op name: " << op_info->op_name()
+                    << ", impl type: " << ImplTypeToStr(op_info->imply_type())
+                    << ", impl path: " << op_info->impl_path();
+    return true;
+  }
   if (!GetRefInfo(op_info)) {
     MS_LOG(ERROR) << "GetRefInfo Failed";
     return false;
   }
-  if (!CheckRepetition(op_info)) {
-    MS_LOG(ERROR) << "CheckRepetition Failed";
-    return false;
-  }
   op_info_.push_back(op_info);
   return true;
 }
@@ -269,6 +325,9 @@ bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply
 }
 
 std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type) {
+  if (!OpLib::RegOpFromLocalInfo()) {
+    MS_LOG(INFO) << "Warning reg local op info failed.";
+  }
   auto context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context);
   bool is_gpu = (context->device_target() == kGPUDevice);
@@ -280,11 +339,16 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
   for (const auto &op_info : op_info_) {
     MS_EXCEPTION_IF_NULL(op_info);
     if (op_info->op_name() == op_name && op_info->imply_type() == imply_type) {
-      return op_info;
+      auto akg_processor_match = [&]() {
+        return is_gpu ? op_info->processor() == kCUDA : op_info->processor() == kAiCore;
+      };
+      if (imply_type != kAKG || akg_processor_match()) {
+        return op_info;
+      }
     }
   }
-  MS_LOG(DEBUG) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
-                << ", current op num: " << op_info_.size();
+  MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
+               << ", current op num: " << op_info_.size();
   return nullptr;
 }
 
@@ -316,14 +380,11 @@ bool OpLib::CheckRepetition(const std::shared_ptr<OpInfo> &op_info) {
   MS_EXCEPTION_IF_NULL(op_info);
   for (const auto &exist_op_info : op_info_) {
     MS_EXCEPTION_IF_NULL(exist_op_info);
-    if (exist_op_info->op_name() == op_info->op_name() && exist_op_info->imply_type() == op_info->imply_type() &&
-        exist_op_info->impl_path() != op_info->impl_path()) {
-      MS_LOG(ERROR) << "Op has already exist, please use other name, op name: " << op_info->op_name()
-                    << " op type: " << ImplTypeToStr(op_info->imply_type());
-      return false;
+    if (exist_op_info->equals_to(op_info)) {
+      return true;
     }
   }
-  return true;
+  return false;
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/oplib/oplib.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
similarity index 85%
rename from mindspore/ccsrc/kernel/oplib/oplib.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
index 47183455a2..845edbfc2a 100644
--- a/mindspore/ccsrc/kernel/oplib/oplib.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <memory>
 #include <nlohmann/json.hpp>
-#include "kernel/oplib/opinfo.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace kernel {
@@ -28,11 +28,8 @@ class OpLib {
  public:
   OpLib() = default;
   virtual ~OpLib() = default;
-  bool RegOp(const std::string &json_string, const std::string &impl_path);
-  static void RegOpInfo(std::shared_ptr<OpInfo> opinfo) {
-    op_info_.emplace_back(opinfo);
-    return;
-  }
+  static bool RegOp(const std::string &json_string, const std::string &impl_path);
+  static void RegOpInfo(const std::shared_ptr<OpInfo> &opinfo) { op_info_.emplace_back(opinfo); }
   static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, OpImplyType imply_type);
   static const std::vector<std::shared_ptr<OpInfo>> &GetAllOpsInfo() { return op_info_; }
 
@@ -40,12 +37,14 @@ class OpLib {
   static std::vector<std::shared_ptr<OpInfo>> op_info_;
 
  private:
+  static bool RegOpFromLocalInfo();
   static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
   static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
                          const std::shared_ptr<OpInfo> &op_info);
   static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
                                 size_t index);
   static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
+  static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
   static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
                                 const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
   static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
diff --git a/mindspore/ccsrc/kernel/oplib/oploader.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
similarity index 96%
rename from mindspore/ccsrc/kernel/oplib/oploader.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
index dd4c37e80b..6b2981e5b3 100644
--- a/mindspore/ccsrc/kernel/oplib/oploader.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_OPLOADER_H
 
 #include <vector>
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/assign.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
similarity index 87%
rename from mindspore/ccsrc/kernel/rts/assign.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
index 7f214b6e6f..552468bb71 100644
--- a/mindspore/ccsrc/kernel/rts/assign.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/assign.h"
+#include "backend/kernel_compiler/rts/assign.h"
 
 #include <memory>
 
@@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
   }
   stream_id_ = stream_id;
 
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
+                                          inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/assign.h b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/assign.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
index 0e7e52d48f..cff946cc36 100644
--- a/mindspore/ccsrc/kernel/rts/assign.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_ASSIGN_H
 
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_goto.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/rts/label_goto.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
index 7bcf42a210..8ec460fe0b 100644
--- a/mindspore/ccsrc/kernel/rts/label_goto.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_goto.h"
+#include "backend/kernel_compiler/rts/label_goto.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelGotoTaskInfo;
@@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
                                                   const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
+    std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_goto.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_goto.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
index efccc12d6f..2680d916a5 100644
--- a/mindspore/ccsrc/kernel/rts/label_goto.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_set.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_set.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
index 5aedd012dc..909885ff17 100644
--- a/mindspore/ccsrc/kernel/rts/label_set.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_set.h"
+#include "backend/kernel_compiler/rts/label_set.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelSetTaskInfo;
@@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
                                                  const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_set.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_set.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
index d05d81f898..8d0cfdfb20 100644
--- a/mindspore/ccsrc/kernel/rts/label_set.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_switch.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/label_switch.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
index fb1ad1601a..ccb49d9497 100644
--- a/mindspore/ccsrc/kernel/rts/label_switch.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_switch.h"
+#include "backend/kernel_compiler/rts/label_switch.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include <string>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelSwitchTaskInfo;
@@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
   MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   cond_ = inputs[0]->addr;
-  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
+  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_switch.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/label_switch.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
index 858f851b2a..1860d38d74 100644
--- a/mindspore/ccsrc/kernel/rts/label_switch.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/memcpy_async.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
index f5fbec6e56..ca1114a83f 100644
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
@@ -14,15 +14,16 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/memcpy_async.h"
+#include "backend/kernel_compiler/rts/memcpy_async.h"
 
 #include <memory>
 #include <string>
 
 #include "runtime/mem.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/trans.h"
+#include "utils/context/ms_context.h"
 
 using ge::model_runner::MemcpyAsyncTaskInfo;
 using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
@@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
   }
 
   stream_id_ = stream_id;
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
+                                          inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.h b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/memcpy_async.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
index 94bbf1ca1c..07a782be50 100644
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
@@ -19,8 +19,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
similarity index 90%
rename from mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
index ff005f399b..8213468b48 100644
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/profiling_kernel_mod.h"
+#include "backend/kernel_compiler/rts/profiling_kernel_mod.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 
 #include "framework/ge_runtime/task_info.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using ProfilerTraceTaskInfo = ge::model_runner::ProfilerTraceTaskInfo;
 using mindspore::device::ascend::ProfilingUtils;
@@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
                << ", outputs size:" << outputs.size();
   stream_id_ = stream_id;
   std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
-    std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
+    std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
similarity index 96%
rename from mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
index f77f3b5c67..cdb43afb3e 100644
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_RTS_PROFILING_KERNEL_MOD_H_
 #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_RTS_PROFILING_KERNEL_MOD_H_
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
 namespace mindspore {
 namespace kernel {
 class ProfilingKernelMod : public RtKernel {
diff --git a/mindspore/ccsrc/kernel/rts/recv.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/recv.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
index c195fd1c92..cee0ef2fdc 100644
--- a/mindspore/ccsrc/kernel/rts/recv.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/recv.h"
+#include "backend/kernel_compiler/rts/recv.h"
 #include <memory>
 #include "runtime/stream.h"
 #include "utils/context/ms_context.h"
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 namespace mindspore {
@@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
   stream_id_ = stream_id;
-  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
+  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/recv.h b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/recv.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
index 68f0b69cc5..73e0214eae 100644
--- a/mindspore/ccsrc/kernel/rts/recv.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/rts/rt_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
index 9e81372383..9279a84cf0 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
index 44d55dca31..dc0aa3e283 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <map>
 #include <string>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/task_stream.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/task_stream.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
index 164605fe9b..9704a9b97f 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel_build.h"
+#include "backend/kernel_compiler/rts/rt_kernel_build.h"
 
 #include <string>
 #include <memory>
 #include <utility>
 #include <algorithm>
 
-#include "kernel/rts/rt_kernel.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
index cbd674b751..ccfb8d923b 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
@@ -19,7 +19,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 namespace mindspore {
 namespace kernel {
 KernelModPtr RtOpBuild(const AnfNodePtr &anf_node);
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_info.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_info.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
index 14f5a60a07..9501aed5f2 100755
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_info.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 #include <unordered_map>
 #include <algorithm>
 #include "utils/convert_utils.h"
 #include "utils/utils.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_info.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_info.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
index ae3753b4c8..6048fb3779 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_info.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
@@ -26,8 +26,8 @@
 #include <utility>
 
 #include "ir/dtype.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/rts/send.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/send.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
index ccdd43ebb6..11c0a7d668 100644
--- a/mindspore/ccsrc/kernel/rts/send.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/send.h"
+#include "backend/kernel_compiler/rts/send.h"
 #include <memory>
 #include "runtime/event.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::EventRecordTaskInfo;
@@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
   stream_id_ = stream_id;
-  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
+  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/send.h b/mindspore/ccsrc/backend/kernel_compiler/rts/send.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/send.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/send.h
index 5c5b7cf09e..dbadb1ef44 100644
--- a/mindspore/ccsrc/kernel/rts/send.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/send.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_SEND_H
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/stream_active.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/stream_active.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
index 4f0895a0be..e33549973d 100644
--- a/mindspore/ccsrc/kernel/rts/stream_active.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/stream_active.h"
+#include "backend/kernel_compiler/rts/stream_active.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::StreamActiveTaskInfo;
@@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
   stream_id_ = stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   for (auto &index : active_streams_index_) {
-    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
+    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
+      std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
     MS_EXCEPTION_IF_NULL(task_info_ptr);
     task_info_list.emplace_back(task_info_ptr);
     MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
diff --git a/mindspore/ccsrc/kernel/rts/stream_active.h b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/stream_active.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
index 68c422e7c2..409c3437dc 100644
--- a/mindspore/ccsrc/kernel/rts/stream_active.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_STREAM_ACTIVE_H
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/stream_switch.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
index bab6b04366..5fe03b1960 100644
--- a/mindspore/ccsrc/kernel/rts/stream_switch.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/stream_switch.h"
+#include "backend/kernel_compiler/rts/stream_switch.h"
 
 #include <memory>
 #include <vector>
 
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::StreamSwitchTaskInfo;
@@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
   auto ites_per_loop = inputs[1]->addr;
   MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
                << ", stream_id:" << stream_id;
-  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
-    std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
+  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
+    kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.h b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/stream_switch.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
index 4e927f3059..64a51f68bf 100644
--- a/mindspore/ccsrc/kernel/rts/stream_switch.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/task_stream.h b/mindspore/ccsrc/backend/kernel_compiler/task_stream.h
similarity index 100%
rename from mindspore/ccsrc/kernel/task_stream.h
rename to mindspore/ccsrc/backend/kernel_compiler/task_stream.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
index c38f48763e..449a9f4556 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
 
 #include <map>
 #include <unordered_set>
@@ -23,8 +23,8 @@
 #include <vector>
 #include <algorithm>
 
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/opinfo.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace kernel {
@@ -84,6 +84,7 @@ static std::map<string, string> tbe_func_adapter_map = {
   {"transpose", "transpose_d"},
   {"fill", "fill_d"},
   {"unsorted_segment_sum", "unsorted_segment_sum_d"},
+  {"unsorted_segment_prod", "unsorted_segment_prod_d"},
   {"concat", "concat_d"},
   {"slice", "slice_d"},
   {"reduce_sum", "reduce_sum_d"},
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_adapter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
index 51c4cfd777..aa09efc11f 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
@@ -21,8 +21,8 @@
 #include <memory>
 #include <vector>
 #include "nlohmann/json.hpp"
-#include "ir/base.h"
-#include "kernel/oplib/opinfo.h"
+#include "base/base.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 // Note: This file is mainly used to adapt the ME front-end operator description and
 //       the TBE back-end operator implementation difference
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
index 90c5557253..e7fd94ef84 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 
 #include <unordered_map>
 #include <map>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
similarity index 94%
rename from mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
index 2c8d3008b9..dea058cd56 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_TBE_COMMON_UTILS_H_
 
 #include <string>
-#include "kernel/kernel.h"
-#include "ir/base.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "base/base.h"
 #include "ir/dtype/type.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
similarity index 99%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
index 645a195f5e..73642b291a 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
 #include <memory>
 #include <map>
 #include <algorithm>
-#include "operator/ops.h"
-#include "parallel/ops_info/ops_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_adapter.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_convert_utils.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "frontend/operator/ops.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
index eef02efa87..768f811055 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
@@ -25,10 +25,10 @@
 #include <vector>
 #include <nlohmann/json.hpp>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "pybind11/stl.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
index 0f377940da..e6cb4cf30d 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
 #include <algorithm>
 #include "runtime/rt.h"
-#include "nlohmann/json.hpp"
+#include "utils/context/ms_context.h"
 #include "graphengine/inc/framework/ge_runtime/task_info.h"
 
 namespace mindspore {
@@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
 
   MS_LOG(INFO) << "block_dim is:" << block_dim_;
 
-  TbeTaskInfoPtr task_info_ptr =
-    make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
-                                               meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
+  TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
+    kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
+    output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
similarity index 95%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
index e0e7ab4646..de48c83d9b 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
index 43d492f397..48223f40c6 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
 
 #include <memory>
 #include <map>
@@ -24,14 +24,14 @@
 #include <string>
 
 #include "utils/context/ms_context.h"
-#include "kernel/tbe/tbe_adapter.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "./common.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_convert_utils.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
index 637c03bce3..a29469b47c 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <map>
 #include <vector>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "pybind11/stl.h"
 #include <nlohmann/json.hpp>
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h
similarity index 100%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
index 8050f02f95..c5e882949b 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
index af711ddf29..4685df6724 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 #include "ir/anf.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
similarity index 84%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
index 3f8e5b85c3..61aa9dfb91 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
 #include <string>
 #include <vector>
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
-constexpr char kAxis[] = "axis";
-constexpr char kTypeInt32[] = "Int32";
 constexpr size_t kInputIndex_0 = 0;
 constexpr size_t kOutputIndex_0 = 0;
 constexpr size_t kChannelN = 0;
@@ -50,7 +49,7 @@ bool TbeKernelReduceSelecter::GetShapeInfo(SupportFormat *support_format) {
   // get keep dim attr
   GetReduceAttrKeepDim();
   // get axis attr
-  GetReduceAttrAxis();
+  axis_ = GetReduceAttrAxis(cnode_ptr_);
   AssignSupportFormat(kOpFormat_DEFAULT, support_format);
   return true;
 }
@@ -121,31 +120,6 @@ bool TbeKernelReduceSelecter::IsFracZAndC1HWNCoC0Common(const std::string &forma
   return true;
 }
 
-void TbeKernelReduceSelecter::GetReduceAttrAxis() {
-  auto primitive = AnfAlgo::GetCNodePrimitive(cnode_ptr_);
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto axis = primitive->GetAttr(kAxis);
-  if (axis == nullptr) {
-    MS_LOG(INFO) << "This node does't have axie attr.";
-    return;
-  }
-  auto type = axis->type();
-  MS_EXCEPTION_IF_NULL(type);
-  std::vector<int> axis_list;
-  if (type->ToString() == kTypeInt32) {
-    axis_list.emplace_back(GetValue<int>(axis));
-  } else {
-    axis_list = GetValue<std::vector<int>>(axis);
-  }
-  for (const auto &elem : axis_list) {
-    if (elem < 0) {
-      axis_.emplace_back(input_shape_.size() + elem);
-    } else {
-      axis_.emplace_back(IntToSize(elem));
-    }
-  }
-}
-
 void TbeKernelReduceSelecter::GetReduceAttrKeepDim() {
   if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode_ptr_)) {
     MS_LOG(INFO) << "This node does't have keep_attr.";
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
similarity index 94%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
index e66525fd64..196bb7b06a 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 #include "ir/anf.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 namespace mindspore {
 namespace kernel {
 class TbeKernelReduceSelecter {
@@ -36,7 +36,6 @@ class TbeKernelReduceSelecter {
 
  private:
   bool IsFracZAndC1HWNCoC0Common(const std::string &format, SupportFormat *support_format) const;
-  void GetReduceAttrAxis();
   void GetReduceAttrKeepDim();
   void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const;
   bool Is4DShape(const std::vector<size_t> &shape) const;
@@ -44,7 +43,7 @@ class TbeKernelReduceSelecter {
   CNodePtr cnode_ptr_;
   std::vector<size_t> input_shape_{};
   std::vector<size_t> output_shape_{};
-  std::vector<size_t> axis_{};
+  std::vector<int> axis_{};
   bool keep_dims_ = false;
 };
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
index 9951321f5e..21f2347629 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
@@ -14,23 +14,23 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
 #include <memory>
 #include <map>
 #include <set>
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/tbe/tbe_kernel_build.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
 #include "nlohmann/json.hpp"
 #include "utils/context/ms_context.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/tbe/tbe_convert_utils.h"
-#include "parallel/ops_info/ops_utils.h"
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
similarity index 95%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
index c400bdbb6f..679c56379f 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/oplib/opinfo.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
index 7204fb7f96..facb07991a 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 #include "common/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.h
similarity index 100%
rename from mindspore/ccsrc/kernel/tbe/tbe_python_funcs.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
index ae7e5cb6d5..76ef7b08d5 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -27,15 +27,15 @@
 #include <fstream>
 
 #include "runtime/kernel.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/dtype/type.h"
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 #include "securec/include/securec.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
index 56fbe7967a..39ddaaa73d 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
@@ -23,9 +23,9 @@
 #include <map>
 #include <unordered_map>
 
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "ir/anf.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/pre_activate/CMakeLists.txt b/mindspore/ccsrc/backend/optimizer/CMakeLists.txt
similarity index 84%
rename from mindspore/ccsrc/pre_activate/CMakeLists.txt
rename to mindspore/ccsrc/backend/optimizer/CMakeLists.txt
index 239757fb17..ee1532a416 100644
--- a/mindspore/ccsrc/pre_activate/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/optimizer/CMakeLists.txt
@@ -11,4 +11,4 @@ if (ENABLE_D)
 endif ()
 
 set_property(SOURCE ${_PREACTIVATE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PRE_ACT)
-add_library(_mindspore_pre_activate_obj OBJECT ${_PREACTIVATE_SRC_LIST})
+add_library(_mindspore_backend_optimizer_obj OBJECT ${_PREACTIVATE_SRC_LIST})
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
similarity index 74%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
index 48ce87629c..64d76ab358 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@@ -13,88 +13,90 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ascend_backend_optimization.h"
+#include "backend/optimizer/ascend/ascend_backend_optimization.h"
 #include <memory>
 #include <string>
 #include <set>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fission/bn_split.h"
-#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
-#include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h"
-#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h"
-#include "pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h"
-#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
-#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
-#include "pre_activate/pass/communication_op_fusion.h"
-#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_value_fusion.h"
-#include "pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_right_rule.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h"
-#include "pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
-#include "pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h"
-#include "pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
-#include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
-#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
-#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
-#include "pre_activate/ascend/ir_fission/transdata_split.h"
-#include "pre_activate/ascend/ir_fission/topk_split.h"
-#include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
-#include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
-#include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
-#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
-#include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
-#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
-#include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
-#include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
-#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h"
-#include "pre_activate/pass/getitem_tuple.h"
-#include "pre_activate/pass/optimize_dependence.h"
-#include "pre_activate/pass/erase_visit_attr.h"
-#include "pre_activate/ascend/format_type/insert_cast.h"
-#include "pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
-#include "pre_activate/pass/eliminate_redundant_op.h"
-#include "pre_activate/pass/common_subexpression_elimination.h"
-#include "pre_activate/pass/fuse_graph_kernel.h"
-#include "pre_activate/pass/fuse_basic.h"
-#include "pre_activate/pass/add_atomic_clean.h"
-#include "pre_activate/ascend/format_type/merge_cast_to_op.h"
-#include "pre_activate/ascend/format_type/check_consistency.h"
-#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
-#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h"
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
-#include "pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h"
-#include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
-#include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
-#include "pre_activate/ascend/ir_fission/addn_fission.h"
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h"
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h"
-#include "pre_activate/ascend/ir_fission/split_fission.h"
-#include "pre_activate/ascend/format_type/modify_ops_attrs.h"
-#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h"
+#include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
+#include "backend/optimizer/pass/erase_visit_attr.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_basic.h"
+#include "backend/optimizer/pass/add_atomic_clean.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h"
+#include "backend/optimizer/ascend/format_type/insert_transdata_for_runop.h"
+#include "backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/format_type/modify_ops_attrs.h"
+#include "backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h"
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
 #include "utils/context/ms_context.h"
 #include "utils/config_manager.h"
 #include "debug/anf_ir_dump.h"
@@ -159,6 +161,7 @@ void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_g
   MS_EXCEPTION_IF_NULL(kernel_graph);
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto data_layout_pm = std::make_shared<PassManager>("pynative_transop_pm");
+  data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>());
   data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>());
   data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>());
   data_layout_pm->AddPass(std::make_shared<GetitemTuple>());
@@ -240,11 +243,16 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
   }
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
-  ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
-  ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion0>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion1>());
+  if (context_ptr->execution_mode() == kPynativeMode) {
+    ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>());
+  } else {
+    ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion0>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion1>());
+  }
   ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
   if (context_ptr->ir_fusion_flag()) {
     AddAscendBackendOptionalIRFusion(ir_fusion_pm.get());
@@ -256,6 +264,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
     ir_fusion_pm->AddPass(std::make_shared<EraseVisitAttr>());
   }
   ir_fusion_pm->AddPass(std::make_shared<InsertMemcpyAsyncForHcclOp>());
+  ir_fusion_pm->AddPass(std::make_shared<AddInputToOutput>());
   optimizer->AddPassManager(ir_fusion_pm);
   (void)optimizer->Optimize(kernel_graph);
   kernel_graph->SetExecOrderByDefault();
@@ -284,11 +293,8 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
   }
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
-  ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
   ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion0>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion1>());
   ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
   ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
   ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
@@ -352,7 +358,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
     std::string file_path =
       save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
     DumpIR(file_path, kernel_graph, true);
-    DumpIRProto(kernel_graph, "after_hwopt");
+    DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id()));
     kernel_graph->DumpFuncGraph("hwopt_d_end");
   }
 }
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
index 222c4b90b5..8194ab467b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ASCEND_BACKEND_OPTIMIZATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ASCEND_BACKEND_OPTIMIZATION_H_
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace opt {
 void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
index 9c498bd736..fd4c0e5952 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 #include <set>
 #include "common/trans.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
index ad48ca5291..cb308a09a0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
@@ -19,10 +19,10 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "device/ascend/kernel_select_ascend.h"
-#include "kernel/kernel_query.h"
-#include "kernel/oplib/oplib.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
@@ -70,6 +70,21 @@ class KernelQuery {
   }
 };
 using KernelQueryPtr = std::shared_ptr<KernelQuery>;
+
+class OpFinder {
+ public:
+  OpFinder() = default;
+  virtual ~OpFinder() = default;
+  virtual int GetOpRegisteredOutputNum(const std::string &op_name) {
+    auto op_info = kernel::OpLib::FindOp(op_name, kernel::kTBE);
+    if (op_info == nullptr) {
+      return -1;
+    }
+    return op_info->outputs_ptr().size();
+  }
+};
+using OpFinderPtr = std::shared_ptr<OpFinder>;
+
 void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format,
                             const AnfNodePtr &trans_data, const std::vector<kernel::Axis> &reshape_type = {});
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
index 94318d63ca..22183c9050 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
index 6cdc5885f6..dfc45b4688 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
index 1f7fef9e62..59915d43d4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
index b5688f3a36..abaf264d2e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
index 6091eb572d..1bfff1b50e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
index 7d779d35f8..6bf74d5268 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
index 963f1885fe..144ab4b53f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
index 171352de9b..93aa324566 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
index 63e7dcf6b8..a2ebfbe79e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
index 7a06faa624..224422530b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
index a126143811..1a67e3c39b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
index 062b8182fb..911cf744de 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
index d83b32a888..1eb26b12bc 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
index bf7e581dff..6dddd600c2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
index 98a6838bed..285b8f6c07 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
index c2e72f26ff..6746dad984 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
index 2f04e16692..1e24cce0e4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
index 54ff0f5982..ae63687631 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
index a516f04442..27a7a786d1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include <unordered_set>
 #include <memory>
 #include "debug/anf_ir_dump.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
index 8d6eca774c..dced2c2fa2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
@@ -21,11 +21,11 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
index d1ef5dc83b..7fcc6e45e0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
index 5baaa6db86..e0d08bb58d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
index be4d2af1cb..58a219aec7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
index 0e2510128a..40a45360a1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
index 623f0e3426..95955818eb 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
 #include <vector>
 #include <algorithm>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
index 42d896e96b..4d56eee7b3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
index 0dcf2362bc..f2117f9374 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
index 41f06ba1f9..f3b97f8357 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
index 5bc0fdced7..d93b47b66c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
index c6c5fe88dc..371c206399 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
index faa5169c40..9685530705 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
 #include <vector>
 #include <tuple>
 #include <utility>
@@ -23,11 +23,11 @@
 #include <memory>
 #include <string>
 #include <algorithm>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
index 7099c92772..69eb0f43d4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
index 6d0906363e..a729cdd0f9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore::opt {
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
index 523fc87a38..365088b34a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
index 01a3f789e7..bac9f54ace 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
 #include <vector>
 #include <memory>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
index eb3b78d33f..6fefc32230 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
index 63ea59d744..2585006be6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
 #include <vector>
 #include <set>
 #include <string>
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
index e2f3b781ed..7bd730a84d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_HCCL_OP_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
index b73fe6c83c..be61833fe4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h"
+#include "backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h"
 #include <vector>
 #include <string>
 #include <memory>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "kernel//oplib/oplib.h"
-#include "operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler//oplib/oplib.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
index bfc201ed11..6aed678ff2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_PAD_FOR_NMS_WITH_MASK_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_PAD_FOR_NMS_WITH_MASK_H
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc
new file mode 100644
index 0000000000..f508bb2868
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <map>
+
+#include "utils/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "common/utils.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+using ConvertFunction = std::function<void(const CNodePtr &)>;
+
+void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode);
+const size_t kAxis_H = 2;
+const size_t kAxis_W = 3;
+const size_t kAxis_6HD_H = 1;
+const size_t kAxis_6HD_W = 2;
+const std::map<std::string, ConvertFunction> kReduceConvertMap = {{kOpFormat_FRAC_Z, ConvertReduceAttrFraczAnd6HD},
+                                                                  {kOpFormat_C1HWNCoC0, ConvertReduceAttrFraczAnd6HD}};
+void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int> &reduce_axis) {
+  if (reduce_axis.empty()) {
+    MS_LOG(EXCEPTION) << "The node " << cnode->DebugString() << "'s reduce axis got a empty vector";
+  }
+  if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
+      AnfAlgo::GetInputTensorNum(cnode) != 1) {
+    MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
+                      << "] is not single input or single output ";
+  }
+  for (auto elem : reduce_axis) {
+    if (elem > 4) {
+      MS_LOG(INFO) << "reduce axis is larger than 4 dims reduce axis : [" << elem << "]";
+    }
+  }
+}
+
+void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) {
+  auto axis = kernel::GetReduceAttrAxis(cnode);
+  std::vector<int> convert_axis;
+  SafeCheckFunction(cnode, axis);
+  auto format = AnfAlgo::GetInputFormat(cnode, 0);
+  if (format != kOpFormat_FRAC_Z || format != kOpFormat_C1HWNCoC0) {
+    MS_LOG(EXCEPTION) << "The node [" << cnode->DebugString() << "] format " << format << " is not 5hd";
+  }
+  for (auto elem : axis) {
+    switch (elem) {
+      case kAxis_H:
+        convert_axis.emplace_back(kAxis_6HD_H);
+        break;
+      case kAxis_W:
+        convert_axis.emplace_back(kAxis_6HD_W);
+        break;
+      default:
+        MS_LOG(INFO) << "reduce axis is axis : [" << elem << "]"
+                     << " but the format is not supported this reduce axis";
+    }
+  }
+  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(convert_axis), cnode);
+}
+}  // namespace
+
+const BaseRef ChangeAxisOfReduceKernel::DefinePattern() const {
+  VarPtr X = std::make_shared<Var>();
+  VarPtr Xs = std::make_shared<SeqVar>();
+  return VectorRef({X, Xs});
+}
+
+const AnfNodePtr ChangeAxisOfReduceKernel::Process(const FuncGraphPtr &, const AnfNodePtr &node,
+                                                   const EquivPtr &) const {
+  if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealKernel(node)) {
+    return nullptr;
+  }
+  if (AnfAlgo::GetOpPattern(node) != kernel::kReducePattern) {
+    return nullptr;
+  }
+  auto convert_map = kReduceConvertMap.find(AnfAlgo::GetInputFormat(node, 0));
+  if (convert_map == kReduceConvertMap.end()) {
+    return nullptr;
+  }
+  convert_map->second(node->cast<CNodePtr>());
+  return nullptr;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h
new file mode 100644
index 0000000000..6bf1287ae7
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
+
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class ChangeAxisOfReduceKernel : public PatternProcessPass {
+ public:
+  explicit ChangeAxisOfReduceKernel(bool multigraph = true)
+      : PatternProcessPass("change_axis_of_reduce_kernel", multigraph) {}
+  ~ChangeAxisOfReduceKernel() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
index 7c8fb70fda..7da0027310 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/format_type/check_consistency.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
 
 #include <string>
 #include <memory>
 #include <vector>
 
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
index e134547dc8..bf956895de 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHECK_CONSISTENCY_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHECK_CONSISTENCY_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
index c0f99ed415..48948dca06 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
+#include "backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/kernel_query.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_query.h"
 namespace mindspore {
 namespace opt {
 const BaseRef ConvertUnSupportNodeToAICPU::DefinePattern() const {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
index 80cc8170ac..e534a851ad 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 #ifndef MINDSPORE_CONVERT_UNSUPPORTED_NODE_TO_AICPU_H
 #define MINDSPORE_CONVERT_UNSUPPORTED_NODE_TO_AICPU_H
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
index 3241684c62..4375a08031 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h"
+#include "backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h"
 #include <utility>
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/oplib/oplib.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
-#include "pre_activate/common/helper.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
index 1b54a7b111..cb3b13dc49 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_DEAL_REF_TRANS_AND_CAST_H_
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
index 3d09233d99..c3f7900645 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
@@ -13,22 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
 
 #include <memory>
 #include <string>
 #include <vector>
 #include <utility>
 
-#include "device/kernel_info.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/oplib/oplib.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
@@ -181,15 +181,6 @@ const AnfNodePtr InsertCast::Process(const FuncGraphPtr &func_graph, const AnfNo
 
   if (AnfAlgo::IsGraphKernel(node)) {
     return ProcessGraphKernelOp(func_graph, node);
-  } else {
-    // insert cast for single op.
-    AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
-    // process input
-    CNodePtr cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto new_node = InsertCastForInput(func_graph, cnode);
-    // process output
-    return InsertCastForOutput(func_graph, new_node, std::vector<bool>(AnfAlgo::GetOutputTensorNum(new_node), true));
   }
   // insert cast for single op.
   AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
index a7f93ec8f3..19c282aac9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_INSERT_CAST_H_
 #include <string>
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "ir/anf.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
index 3f77c68f86..a22a1faa5f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #include <memory>
 #include <vector>
 #include "utils/utils.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
index eb6cfa9542..0b21375327 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <utility>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
index 3df513a19f..d0b92b250d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
+#include "backend/optimizer/ascend/format_type/insert_transdata_for_runop.h"
 #include <memory>
 #include "utils/utils.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
index f699cdd580..82ff5f2b9a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <utility>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
index b1817cec3d..88e9fa77b8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
 
 #include <memory>
 #include <vector>
 #include <algorithm>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
index 7e05c8a02a..d0e467b7a3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MERGE_CAST_TO_OP_H
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
index 42061957b9..adca536f04 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/modify_ops_attrs.h"
+#include "backend/optimizer/ascend/format_type/modify_ops_attrs.h"
 #include <vector>
 #include <memory>
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
index 25ec94b6b4..f5608db05a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
similarity index 62%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
index d81a8c90ce..91b9326cc1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
@@ -14,18 +14,19 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h"
+#include "backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h"
 
 #include <vector>
 #include <map>
 #include <string>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "utils/context/ms_context.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
@@ -50,16 +51,11 @@ const AnfNodePtr RectifyDoMaskKernelInfo::Process(const FuncGraphPtr &graph, con
     return nullptr;
   }
   std::vector<CNodePtr> do_mask_node_list;
-  auto manager = graph->manager();
-  MS_EXCEPTION_IF_NULL(manager);
-  auto node_map = manager->node_users();
-  auto iter = node_map.find(node);
-  if (iter == node_map.end()) {
-    MS_LOG(EXCEPTION) << "Cannot find the node " << node->DebugString() << " in the graph manager!";
-  }
-  auto gen_mask_output_nodes = iter->second;
-  for (const auto &output_node : gen_mask_output_nodes) {
+  auto gen_mask_output_nodes = GetRealNodeUsedList(graph, cnode);
+  MS_EXCEPTION_IF_NULL(gen_mask_output_nodes);
+  for (const auto &output_node : *gen_mask_output_nodes) {
     if (AnfAlgo::GetCNodeName(output_node.first) == prim::kPrimDropoutDoMask->name()) {
+      MS_EXCEPTION_IF_NULL(output_node.first);
       auto output_cnode = output_node.first->cast<CNodePtr>();
       do_mask_node_list.push_back(output_cnode);
     }
@@ -76,11 +72,12 @@ const AnfNodePtr RectifyDoMaskKernelInfo::Process(const FuncGraphPtr &graph, con
                         << " GenMask " << node->DebugString();
     }
   }
-  RectifyKernelInfo(do_mask_node_list);
+  RectifyKernelInfo(do_mask_node_list, graph);
   return nullptr;
 }
 
-void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list) const {
+void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list,
+                                                const FuncGraphPtr &graph) const {
   std::map<std::string, size_t> format_counter;
   std::string special_format;
   std::string convert_format;
@@ -94,17 +91,6 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_
     } else {
       format_counter[do_mask_data_format] = format_counter[do_mask_data_format] + 1;
     }
-    // if has two or more special format we need change all domask's format to default that can avoid insert more
-    // transdata
-    if (format_counter.size() > 2) {
-      convert_format = kOpFormat_DEFAULT;
-      break;
-    }
-    if (kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end() &&
-        special_format != do_mask_data_format) {
-      convert_format = kOpFormat_DEFAULT;
-      break;
-    }
   }
   if (format_counter.size() == 1) {
     return;
@@ -112,17 +98,23 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_
   if (convert_format.empty()) {
     convert_format = GetConvertFormat(format_counter);
   }
-  RectifyDropOutDoMaskKernelInfo(do_mask_node_list, convert_format);
+  RectifyDropOutDoMaskKernelInfo(do_mask_node_list, convert_format, graph);
 }
 
 std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map<std::string, size_t> &format_counter) const {
-  std::string convert_format;
-  const size_t counter = 0;
+  std::string convert_format = kOpFormat_DEFAULT;
+  size_t counter = 0;
+  if (format_counter.size() > 2) {
+    return kOpFormat_DEFAULT;
+  }
+  if (format_counter.size() == 2 && format_counter.find(kOpFormat_DEFAULT) == format_counter.end()) {
+    return kOpFormat_DEFAULT;
+  }
   for (const auto &iter : format_counter) {
     if (counter < iter.second) {
       convert_format = iter.first;
-    }
-    if (counter == iter.second && kHWSpecialFormatSet.find(convert_format) == kHWSpecialFormatSet.end()) {
+      counter = iter.second;
+    } else if (counter == iter.second && kHWSpecialFormatSet.find(iter.first) != kHWSpecialFormatSet.end()) {
       convert_format = iter.first;
     }
   }
@@ -130,13 +122,17 @@ std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map<std::string
 }
 
 void RectifyDoMaskKernelInfo::RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list,
-                                                             const std::string &format) const {
+                                                             const std::string &format,
+                                                             const FuncGraphPtr &graph) const {
   for (const auto &do_mask : do_mask_node_list) {
-    auto builder =
-      std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask));
-    builder->SetInputFormat(format, 0);
-    builder->SetOutputFormat(format, 0);
-    AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get());
+    if (AnfAlgo::GetInputFormat(do_mask, 0) != format) {
+      auto builder =
+        std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask));
+      builder->SetInputFormat(format, 0);
+      builder->SetOutputFormat(format, 0);
+      AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get());
+      ReSelecChildNodeKernelInfo(do_mask, graph);
+    }
   }
 }
 
@@ -159,5 +155,30 @@ AnfNodePtr RectifyDoMaskKernelInfo::RectifyKernelInfoInPynativeProcess(const Anf
   }
   return nullptr;
 }
+
+void RectifyDoMaskKernelInfo::ReSelecChildNodeKernelInfo(const CNodePtr &cnode, const FuncGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(cnode);
+  auto output_node_list = GetRealNodeUsedList(graph, cnode);
+  MS_EXCEPTION_IF_NULL(output_node_list);
+  for (const auto &out_node_info : *output_node_list) {
+    MS_EXCEPTION_IF_NULL(out_node_info.first);
+    auto out_node = out_node_info.first->cast<CNodePtr>();
+    if (AnfAlgo::IsRealKernel(out_node_info.first)) {
+      auto ori_build_info = AnfAlgo::GetSelectKernelBuildInfo(out_node);
+      kernel_selecter->SelectKernel(out_node);
+      auto new_build_info = AnfAlgo::GetSelectKernelBuildInfo(out_node);
+      MS_EXCEPTION_IF_NULL(new_build_info);
+      MS_EXCEPTION_IF_NULL(ori_build_info);
+      if ((*new_build_info) != (*ori_build_info)) {
+        ReSelecChildNodeKernelInfo(out_node, graph);
+      }
+    } else if (AnfAlgo::GetCNodeName(out_node) == prim::kPrimTupleGetItem->name() ||
+               AnfAlgo::GetCNodeName(out_node) == prim::kPrimDepend->name()) {
+      ReSelecChildNodeKernelInfo(out_node, graph);
+    } else {
+      MS_LOG(INFO) << "Reselected the node " << cnode->DebugString() << " failed";
+    }
+  }
+}
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
similarity index 78%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
index 81bad4d8f8..cc9333a013 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
@@ -19,23 +19,28 @@
 #include <map>
 #include <string>
 #include <vector>
+#include <memory>
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 namespace mindspore {
 namespace opt {
 class RectifyDoMaskKernelInfo : public PatternProcessPass {
  public:
   explicit RectifyDoMaskKernelInfo(bool multigraph = true)
-      : PatternProcessPass("batch_norm_bert_fission", multigraph) {}
+      : PatternProcessPass("batch_norm_bert_fission", multigraph), kernel_selecter(std::make_shared<KernelSelect>()) {}
   ~RectifyDoMaskKernelInfo() override = default;
   const BaseRef DefinePattern() const override;
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 
  private:
-  void RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list) const;
+  void RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const FuncGraphPtr &graph) const;
   AnfNodePtr RectifyKernelInfoInPynativeProcess(const AnfNodePtr &node) const;
   std::string GetConvertFormat(const std::map<std::string, size_t> &format_counter) const;
-  void RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const std::string &format) const;
+  void RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const std::string &format,
+                                      const FuncGraphPtr &graph) const;
+  void ReSelecChildNodeKernelInfo(const CNodePtr &cnode, const FuncGraphPtr &graph) const;
+  KernelSelectPtr kernel_selecter;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
index dde40a5090..09992005a4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h"
+#include "backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h"
 #include <vector>
 #include <memory>
-#include "pre_activate/common/helper.h"
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
index 4942c2fc08..135f11f52c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
index b9a86f7bcb..a3fd704bc5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
index 3c62391f9a..e04cdfdf7b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
index e6a8864e46..f0edefd5f5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
 #include <vector>
 #include <memory>
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
index fc214817fc..23f0e56035 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_BERT_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_BERT_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
index 5e41111660..97c67e4441 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
 #include <vector>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
index a8eefdaa85..97100de284 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_INFER_FISSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
index 270b02cb00..97122386c6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h"
 
 #include <vector>
 #include <string>
@@ -22,9 +22,9 @@
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
index e539fdb27c..e5378d8332 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
index 6282ed4f76..6c4e226120 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
 
 #include <vector>
 #include <string>
@@ -22,9 +22,9 @@
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
index 17e1f9b98e..6fe78d4724 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_GRAD_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_GRAD_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
index 66ffa24bf1..33670e5703 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
 
 #include <vector>
 #include <string>
@@ -21,9 +21,9 @@
 
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
index bc5975af17..4340ba0af6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
index 479e00e4c0..e8a778b36f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/lars_v2_fission.h"
+#include "backend/optimizer/ascend/ir_fission/lars_v2_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
index 846d221c53..3a165f2b29 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_LARS_V2_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_LARS_V2_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
index 1a25d83650..1d19def787 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
index f442446b01..c1501b1593 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
@@ -18,9 +18,9 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
index 159be2ac3b..133d51734f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
 #include <vector>
 #include <memory>
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
index 145603132b..fb641c12d6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
index c39a5e01e6..063f81a1ca 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
@@ -82,6 +82,9 @@ void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int split_dim, int
   MS_EXCEPTION_IF_NULL(new_type_ids);
   MS_EXCEPTION_IF_NULL(new_output_shapes);
   auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0);
+  if (split_dim < 0) {
+    split_dim += output_shape.size();
+  }
   output_shape[split_dim] = split_size;
   TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0);
   for (int i = 0; i < num_split; ++i) {
@@ -97,6 +100,9 @@ void SetAttrAndAbstractForBaseSplitv(const CNodePtr &origin_cnode, const CNodePt
   std::vector<std::vector<size_t>> base_output_shapes_base;
   auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0);
   TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0);
+  if (split_dim < 0) {
+    split_dim += output_shape.size();
+  }
   for (int i = 0; i < num_split; ++i) {
     output_shape[split_dim] = size_splits_base[i];
     base_output_shapes_base.emplace_back(output_shape);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
index c2763bb714..6428a21e73 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
index 6e6cea5ae5..c9a879e921 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
 #include <vector>
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
index 0ada93ac70..0f7efb029c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
index c8477353f9..6eeb7a61f7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
 #include <string>
 #include <vector>
 #include <memory>
 #include <unordered_set>
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
index e7293e1fa3..e005a83a2f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
index bfb7e50486..057cf8deed 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #include <set>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
index f450897db1..bc681944c3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
@@ -20,12 +20,12 @@
 #include <utility>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 #include "ir/func_graph.h"
 #include "ir/anf.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
index 59be003b15..189ac94546 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
-#include "pre_activate/common/helper.h"
-
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/common/helper.h"
 namespace mindspore {
 namespace opt {
 AnfNodePtr AdamApplyOneFusion::CreateAdamApplyOneNode(const FuncGraphPtr &func_graph, const EquivPtr &equiv) const {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
index 5ee8a86cfb..683a345cdb 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
index f6077c95f2..b1afa338d4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 
 #include <memory>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
index 742295dd9c..2d599a8cc9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc
new file mode 100644
index 0000000000..cc58d2b057
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
+#include <vector>
+#include <algorithm>
+#include "backend/optimizer/ascend/ir_fusion/input_to_output_registry.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+void GetInputOrOutputNames(const CNodePtr &cnode, const std::string &attr_name, std::vector<std::string> *names_vec) {
+  MS_EXCEPTION_IF_NULL(names_vec);
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  ValuePtr names_value = primitive->GetAttr(attr_name);
+  if (names_value == nullptr) {
+    return;
+  }
+  *names_vec = GetValue<std::vector<std::string>>(names_value);
+}
+
+void AddOutputs(const CNodePtr &cnode, const std::vector<size_t> &input_indices) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<std::string> input_names_vec;
+  GetInputOrOutputNames(cnode, kAttrInputNames, &input_names_vec);
+  std::vector<std::string> output_names_vec;
+  GetInputOrOutputNames(cnode, kAttrOutputNames, &output_names_vec);
+  AbstractBasePtrList abstract_list;
+  auto origin_abstract = cnode->abstract();
+  MS_EXCEPTION_IF_NULL(origin_abstract);
+  if (origin_abstract->isa<abstract::AbstractTuple>()) {
+    auto origin_abstract_tuple = dyn_cast<abstract::AbstractTuple>(origin_abstract);
+    MS_EXCEPTION_IF_NULL(origin_abstract_tuple);
+    AbstractBasePtrList origin_abstract_list = origin_abstract_tuple->elements();
+    (void)std::copy(origin_abstract_list.begin(), origin_abstract_list.end(), std::back_inserter(abstract_list));
+  } else {
+    abstract_list.emplace_back(origin_abstract);
+  }
+
+  for (size_t i = 0; i < input_indices.size(); ++i) {
+    size_t index = input_indices[i];
+    if (index + 1 >= cnode->inputs().size()) {
+      MS_LOG(INFO) << "The input index " << index << " for converting to output is out of range, "
+                   << "node: " << cnode->DebugString();
+      continue;
+    }
+    auto node_to_output = cnode->input(index + 1);
+    MS_EXCEPTION_IF_NULL(node_to_output);
+    abstract_list.emplace_back(node_to_output->abstract());
+    if (!input_names_vec.empty() && !output_names_vec.empty() && index < input_names_vec.size()) {
+      output_names_vec.emplace_back(input_names_vec[index]);
+    }
+  }
+  if (!output_names_vec.empty()) {
+    AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names_vec), cnode);
+  }
+  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+  cnode->set_abstract(abstract_tuple);
+}
+}  // namespace
+
+const AnfNodePtr AddInputToOutput::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                           const EquivPtr &) const {
+  if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) {
+    return nullptr;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::string op_name = AnfAlgo::GetCNodeName(cnode);
+  InputToOutputRegister reg;
+  if (!InputToOutputRegistry::Instance().GetRegisterByOpName(op_name, &reg)) {
+    return nullptr;
+  }
+  int output_num = op_finder_->GetOpRegisteredOutputNum(op_name);
+  // No need add output when it is not a tbe op.
+  if (output_num == -1) {
+    return nullptr;
+  }
+  // No need add output if the output num matches the registered output num for tbe.
+  if (AnfAlgo::GetOutputTensorNum(cnode) >= IntToSize(output_num)) {
+    return nullptr;
+  }
+  bool is_origin_tuple_output = AnfAlgo::IsTupleOutput(cnode);
+  AddOutputs(cnode, reg.input_indices());
+  // No need to create tuple_getitem if the origin output is a tuple because there has already been some tuple_getitems
+  // pointed to the outputs.
+  if (is_origin_tuple_output) {
+    return nullptr;
+  }
+  std::vector<AnfNodePtr> new_outputs;
+  auto new_abstract_tuple = dyn_cast<abstract::AbstractTuple>(cnode->abstract());
+  MS_EXCEPTION_IF_NULL(new_abstract_tuple);
+  CreateMultipleOutputsOfAnfNode(func_graph, cnode, new_abstract_tuple->size(), &new_outputs);
+  if (new_outputs.size() != new_abstract_tuple->size()) {
+    MS_LOG(EXCEPTION) << "Failed to create outputs of " << cnode->DebugString();
+  }
+  return new_outputs[0];
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h
new file mode 100644
index 0000000000..6e5560bfb0
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
+
+#include <string>
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+
+namespace mindspore {
+namespace opt {
+class AddInputToOutput : public PatternProcessPass {
+ public:
+  explicit AddInputToOutput(bool multigraph = true)
+      : PatternProcessPass("add_input_to_output", multigraph), op_finder_(std::make_shared<OpFinder>()) {}
+  ~AddInputToOutput() override = default;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  OpFinderPtr op_finder_;
+};
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
index 1a62b7a5be..51bcd880cd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
index 551fe0f6f9..46872aa959 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_BATCHNORM_TO_BNINFER_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
index 424d3a12c1..defb011396 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
index 020dc1a999..0676f8a040 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_BATCHNORMGRAD_TO_BNINFERGRAD_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
index 2af3afbf19..1d89bfd388 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
index 126480603e..9282b75527 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
index df94e897ec..e1b0cb81e3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
index 309b7cedd0..05bf713bdd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CLIP_BY_VALUE_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
index d49b2d47f3..6ccf3e29bd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
 #include <utility>
 #include <memory>
 #include <vector>
 #include <algorithm>
 #include <string>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
index 170df5b0e4..932f0d2890 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONFUSION_MUL_GRAD_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
index 9e2c6374ce..a8cf0af465 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
 
 #include <memory>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
index a4d0d1ce7a..e3a86e22c9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONFUSION_SOFTMAX_GRAD_RULE_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
index 2f3c998bb8..0fe042dc4e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
index e1811f4db4..7506960ecb 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_DERELU_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
index efc9ee7934..dbff0374f3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include <memory>
 #include <algorithm>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
index f476e96062..b3bbedc36e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc
new file mode 100644
index 0000000000..2fb42f9bd6
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fusion/input_to_output_registry.h"
+#include <utility>
+#include "utils/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+bool ApplyRMSPropPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool FusedMulApplyMomentumPreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool SparseApplyRMSPropPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool ApplyAdagradV2PreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool ApplyKerasMomentumPreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool SparseApplyFtrlPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyFtrlV2PreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyAdagradV2PreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyAdadeltaPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+}  // namespace
+InputToOutputRegistry::InputToOutputRegistry() {
+  Register(kApplyRMSPropOpName, {1, 2}, ApplyRMSPropPreCheck);
+  Register(kFusedMulApplyMomentumOpName, {1}, FusedMulApplyMomentumPreCheck);
+  Register(kApplyAdagradOpName, {1});
+  Register(kApplyAdagradDAName, {1, 2});
+  Register(kApplyAdadeltaOpName, {1, 2});
+  Register(kApplyPowerSignOpName, {1});
+  Register(kApplyProximalAdagradOpName, {1});
+  Register(kApplyAdaMaxOpName, {1, 2});
+  Register(kApplyAdagradV2OpName, {1}, ApplyAdagradV2PreCheck);
+  Register(kApplyKerasMomentumOpName, {1}, ApplyKerasMomentumPreCheck);
+  Register(kSparseApplyFtrlOpName, {1, 2}, SparseApplyFtrlPreCheck);
+  Register(kSparseApplyFtrlV2OpName, {1, 2}, SparseApplyFtrlV2PreCheck);
+  Register(kSparseApplyAdagradV2OpName, {1}, SparseApplyAdagradV2PreCheck);
+  Register(kSparseApplyProximalAdagradOpName, {1});
+  Register(kSparseApplyAdagradOpName, {1});
+  Register(kApplyFtrlV2OpName, {1, 2});
+  Register(kApplyMomentumOpName, {1});
+  Register(kApplyFtrlOpName, {1, 2});
+  Register(kApplyAdamOpName, {1, 2});
+  Register(kApplyCenteredRMSPropOpName, {1, 2, 3});
+  Register(kApplyAddSignOpName, {1});
+  Register(kSparseApplyRMSPropOpName, {1, 2}, SparseApplyRMSPropPreCheck);
+  Register(kSparseApplyAdadeltaOpName, {1, 2}, SparseApplyAdadeltaPreCheck);
+  Register(kApplyAdamWithAmsgradOpName, {1, 2});
+}
+
+InputToOutputRegistry &InputToOutputRegistry::Instance() {
+  static InputToOutputRegistry instance;
+  return instance;
+}
+
+void InputToOutputRegistry::Register(const InputToOutputRegister &reg) {
+  auto op_name = reg.op_name();
+  if (op_input_to_output_map_.find(op_name) == op_input_to_output_map_.end()) {
+    (void)op_input_to_output_map_.insert(make_pair(op_name, reg));
+    MS_LOG(DEBUG) << op_name << " input2output register successfully!";
+  }
+}
+
+void InputToOutputRegistry::Register(const std::string &op_name, const std::vector<size_t> &input_indices,
+                                     const PreCheckFunc &pre_check_func) {
+  if (op_input_to_output_map_.find(op_name) == op_input_to_output_map_.end()) {
+    InputToOutputRegister reg(op_name, pre_check_func);
+    reg.set_input_indices(input_indices);
+    (void)op_input_to_output_map_.insert(make_pair(op_name, reg));
+    MS_LOG(DEBUG) << op_name << " input2output register successfully!";
+  }
+}
+
+bool InputToOutputRegistry::GetRegisterByOpName(const std::string &op_name, InputToOutputRegister *reg) const {
+  if (op_input_to_output_map_.find(op_name) != op_input_to_output_map_.end()) {
+    *reg = op_input_to_output_map_.at(op_name);
+    MS_LOG(DEBUG) << op_name << " input2output find in registry.";
+    return true;
+  }
+  return false;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h
new file mode 100644
index 0000000000..45738c289c
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+#include "ir/anf.h"
+#include "common/utils.h"
+
+namespace mindspore {
+namespace opt {
+using PreCheckFunc = std::function<bool(const CNodePtr &node)>;
+class InputToOutputRegister {
+ public:
+  explicit InputToOutputRegister(
+    const std::string &op_name = "", const PreCheckFunc &pre_check_func = [](const CNodePtr &node) { return true; })
+      : op_name_(op_name), pre_check_func_(pre_check_func) {}
+  virtual ~InputToOutputRegister() = default;
+
+  void set_input_indices(const std::vector<size_t> &input_indices) { input_indices_ = input_indices; }
+
+  const std::vector<size_t> &input_indices() const { return input_indices_; }
+  const std::string &op_name() const { return op_name_; }
+
+ private:
+  std::string op_name_;
+  std::vector<size_t> input_indices_;
+  PreCheckFunc pre_check_func_;
+};
+
+class InputToOutputRegistry {
+ public:
+  static InputToOutputRegistry &Instance();
+  void Register(const InputToOutputRegister &reg);
+  void Register(
+    const std::string &op_name, const std::vector<size_t> &input_indices,
+    const PreCheckFunc &pre_check_func = [](const CNodePtr &node) { return true; });
+  bool GetRegisterByOpName(const std::string &op_name, InputToOutputRegister *reg) const;
+
+ private:
+  InputToOutputRegistry();
+  ~InputToOutputRegistry() = default;
+  DISABLE_COPY_AND_ASSIGN(InputToOutputRegistry)
+  std::unordered_map<std::string, InputToOutputRegister> op_input_to_output_map_;
+};
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
index 42e37df3e4..fd9fd31f12 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
 #include <memory>
 #include <utility>
 #include <tuple>
 #include <algorithm>
 #include <unordered_map>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
index 0089c33f87..d14ce6e3fe 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
index 0e3cd28a66..4ef3fa269f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
index 5d61975197..23114c37ee 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
@@ -19,8 +19,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
index 26828f2137..f21433b3c6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
 
 #include <vector>
 #include <string>
 #include <tuple>
 #include <utility>
 
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
index ff14a253dd..58f05c37ba 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
index 5065c4c5ba..03bc1e0484 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
 #include <vector>
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
index 3d15001da2..67687cc037 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAMB_NEXT_RIGHT_RULE_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
index b5b6d2bb08..8e38c3cc2e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
index cb3939549f..5ea01ccf65 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAMB_UPDATE_WITH_LR_RULE_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
index 43e1872163..59511a611a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
 #include <memory>
 #include <algorithm>
 #include <unordered_map>
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
index ea614d3d2d..c5396178a5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
index b16387d8f1..fa1e92120d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
index 2655c0f14d..5bf1608143 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
@@ -17,9 +17,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAYER_NORM_BETA_GAMMA_BACKPROP_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
index e81c804b71..fdd390677a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include <memory>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
index 56675243de..8c762435a9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MATMUL_BIASADD_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MATMUL_BIASADD_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
index e7a73a9c7f..90c5ac19a9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include <memory>
 #include <vector>
 #include <string>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
index c092e0ca22..8d36684a11 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MOMENTUM_LOSSSCALE_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MOMENTUM_LOSSSCALE_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
index 2536255fc1..2d766891a0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
index 4b4db2b312..0ad13e10e6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MUL_ADD_FUSION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MUL_ADD_FUSION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
index a5e4675c8f..3567864e2f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
index d03309bf73..484cb75237 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_PASS_MUL_ADDN_FUSION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_PASS_MUL_ADDN_FUSION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
index a3c87dad5d..0c2667e4d9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
@@ -63,7 +63,7 @@ const AnfNodePtr ParamTransRoad(const FuncGraphPtr &func_graph, const AnfNodePtr
 kernel::KernelBuildInfoPtr GetKernelBuildInfo(const CNodePtr &cast, const string &format, TypeId input_type,
                                               TypeId output_type) {
   MS_EXCEPTION_IF_NULL(cast);
-  auto kernel_info = cast->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(cast->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto cast_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(cast_build_info);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
index 823ec083b1..0479fd3d63 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
@@ -22,7 +22,7 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
index 857670a384..ebaa429ebf 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
index 0ba688b134..122bdf55ca 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <utility>
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
index fa2815ff62..6f48eabbc5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
+#include "backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
index ddb25df70c..848713201a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
@@ -20,9 +20,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
index 9b13002798..02a866930c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
index 5abf3e0d53..a76538019e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
index f95406e5e1..a3706bfb68 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
index 59032e6973..1b884b2726 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
@@ -18,7 +18,7 @@
 
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
index 6261b63882..67c881759a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
@@ -13,19 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <tuple>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
-#include "common/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
+#include "frontend/operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
@@ -51,7 +50,7 @@ CNodePtr GenerateSquareSumV1(const FuncGraphPtr &graph, const CNodePtr &square,
   square_sumv1->set_scope(sum->scope());
   AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv1);
   AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv1);
-  auto names = MakeValue<std::vector<std::string>>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()});
+  auto names = MakeValue<std::vector<std::string>>({square->fullname_with_scope(), sum->fullname_with_scope()});
   AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv1);
   return square_sumv1;
 }
@@ -74,7 +73,7 @@ CNodePtr GenerateSquareSumV2(const FuncGraphPtr &graph, const CNodePtr &square,
   square_sumv2->set_scope(sum->scope());
   AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv2);
   AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv2);
-  auto names = MakeValue<std::vector<std::string>>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()});
+  auto names = MakeValue<std::vector<std::string>>({square->fullname_with_scope(), sum->fullname_with_scope()});
   AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv2);
   return square_sumv2;
 }
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
index 5a694a5585..54189606ba 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SQUARE_SUM_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SQUARE_SUM_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
index 250f86d9b1..46bf2a8604 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
index 8b979f869d..39b8fe4687 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
index e45fc2637f..b6da588e89 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
index 833588cf45..852d5194ec 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
similarity index 83%
rename from mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
rename to mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
index b930ac69c9..887b9a76a1 100644
--- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/common_backend_optimization.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include "utils/context/ms_context.h"
 #include "debug/anf_ir_dump.h"
 
diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.h b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/common/common_backend_optimization.h
rename to mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
index 6ce92da0dc..4127fc05de 100644
--- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.h
+++ b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_COMMON_BACKEND_OPTIMIZATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_COMMON_BACKEND_OPTIMIZATION_H_
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace opt {
 void BackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
diff --git a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
index 2b45fc6579..d21cabe54a 100644
--- a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h
rename to mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
index 91e83600f2..bdee5ee84a 100644
--- a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_FUSION_ID_ALLOCATOR_H_
 
 #include <memory>
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/backend/optimizer/common/helper.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/helper.cc
rename to mindspore/ccsrc/backend/optimizer/common/helper.cc
index e1db0ed6ed..266130c6b1 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include <string>
 #include <utility>
 #include <unordered_set>
@@ -24,10 +24,10 @@
 #include <deque>
 #include "utils/utils.h"
 #include "utils/base_ref.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "common/utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/common/helper.h b/mindspore/ccsrc/backend/optimizer/common/helper.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/helper.h
rename to mindspore/ccsrc/backend/optimizer/common/helper.h
index 49a1d47d0c..a267e65b53 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.h
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.h
@@ -23,9 +23,9 @@
 #include <set>
 #include <unordered_set>
 #include "ir/func_graph.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "common/utils.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.cc b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/node_pass.cc
rename to mindspore/ccsrc/backend/optimizer/common/node_pass.cc
index 876da8667b..16f5284a57 100644
--- a/mindspore/ccsrc/pre_activate/common/node_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/node_pass.h"
+#include "backend/optimizer/common/node_pass.h"
 
 #include <unordered_set>
 #include <deque>
@@ -22,7 +22,7 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.h b/mindspore/ccsrc/backend/optimizer/common/node_pass.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/common/node_pass.h
rename to mindspore/ccsrc/backend/optimizer/common/node_pass.h
index 7750a59e59..780ae1a056 100644
--- a/mindspore/ccsrc/pre_activate/common/node_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/common/node_pass.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.cc b/mindspore/ccsrc/backend/optimizer/common/optimizer.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/common/optimizer.cc
rename to mindspore/ccsrc/backend/optimizer/common/optimizer.cc
index 71a523ea1d..01e9111e86 100644
--- a/mindspore/ccsrc/pre_activate/common/optimizer.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/optimizer.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #include <functional>
 #include <memory>
@@ -23,8 +23,8 @@
 #include <utility>
 #include <initializer_list>
 
-#include "pre_activate/common/pass_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.h b/mindspore/ccsrc/backend/optimizer/common/optimizer.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/optimizer.h
rename to mindspore/ccsrc/backend/optimizer/common/optimizer.h
index 1f9961df6b..0b03c9c0ee 100644
--- a/mindspore/ccsrc/pre_activate/common/optimizer.h
+++ b/mindspore/ccsrc/backend/optimizer/common/optimizer.h
@@ -24,11 +24,11 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/primitive.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "utils/graph_utils.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pass.h b/mindspore/ccsrc/backend/optimizer/common/pass.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/common/pass.h
rename to mindspore/ccsrc/backend/optimizer/common/pass.h
index 3d2468cddb..6e35fb1dc4 100644
--- a/mindspore/ccsrc/pre_activate/common/pass.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pass.h
@@ -19,7 +19,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pass_manager.cc b/mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/pass_manager.cc
rename to mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
index 3213b8a6d2..f9f41237e0 100644
--- a/mindspore/ccsrc/pre_activate/common/pass_manager.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/pass_manager.h"
+#include "backend/optimizer/common/pass_manager.h"
 
 #include <sys/time.h>
 #include <unordered_set>
diff --git a/mindspore/ccsrc/pre_activate/common/pass_manager.h b/mindspore/ccsrc/backend/optimizer/common/pass_manager.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/common/pass_manager.h
rename to mindspore/ccsrc/backend/optimizer/common/pass_manager.h
index 38fe49b94c..51db27d250 100644
--- a/mindspore/ccsrc/pre_activate/common/pass_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pass_manager.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/node_pass.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/node_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pattern_engine.cc b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/pattern_engine.cc
rename to mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
index 42f966aa3d..bd4efd82ef 100644
--- a/mindspore/ccsrc/pre_activate/common/pattern_engine.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 
 #include <exception>
 #include <iostream>
 #include <functional>
 #include <iterator>
 
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 #include "ir/anf.h"
 #include "utils/convert_utils_base.h"
diff --git a/mindspore/ccsrc/pre_activate/common/pattern_engine.h b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/pattern_engine.h
rename to mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
index 858b1aecb8..51fa8801b2 100644
--- a/mindspore/ccsrc/pre_activate/common/pattern_engine.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
@@ -33,8 +33,8 @@
 #include <list>
 #include <utility>
 
-#include "pre_activate/common/visit.h"
-#include "ir/base.h"
+#include "backend/optimizer/common/visit.h"
+#include "base/base.h"
 #include "utils/log_adapter.h"
 #include "utils/base_ref.h"
 
diff --git a/mindspore/ccsrc/pre_activate/common/visit.cc b/mindspore/ccsrc/backend/optimizer/common/visit.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/visit.cc
rename to mindspore/ccsrc/backend/optimizer/common/visit.cc
index 179177dd67..d0b52609f8 100644
--- a/mindspore/ccsrc/pre_activate/common/visit.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/visit.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/visit.h"
+#include "backend/optimizer/common/visit.h"
 
 #include <vector>
 #include <memory>
 #include <algorithm>
 #include <utility>
 
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "utils/any.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
diff --git a/mindspore/ccsrc/pre_activate/common/visit.h b/mindspore/ccsrc/backend/optimizer/common/visit.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/visit.h
rename to mindspore/ccsrc/backend/optimizer/common/visit.h
index 2017b03b2f..9799d3f9c1 100644
--- a/mindspore/ccsrc/pre_activate/common/visit.h
+++ b/mindspore/ccsrc/backend/optimizer/common/visit.h
@@ -26,7 +26,7 @@
 #include <string>
 #include <memory>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "utils/base_ref.h"
 
 // namespace to support utils definition
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
index 8111ee429d..41e4abee27 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/gpu/adam_fusion.h"
+#include "backend/optimizer/gpu/adam_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.h b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_fusion.h
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
index d8c10a0986..f87defc04c 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
index c950cbd56f..c95945c980 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/gpu/adam_weight_decay_fusion.h"
+#include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
index 0ada5756e3..53477ec898 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
index c75860a8df..b531b0caa5 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
 #include <algorithm>
 #include "utils/log_adapter.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.h
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
index ea9947b41b..1952415515 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
@@ -22,8 +22,8 @@
 #include <queue>
 #include <memory>
 #include <utility>
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
 
 using HostAddress = mindspore::kernel::Address;
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
index 095f8f6495..8f705be556 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 #include "common/utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
@@ -184,14 +184,16 @@ DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr device
   if (iter != global_mem_block_list_.begin()) {
     return *(--iter);
   }
-  MS_LOG(ERROR) << "Can't find the mem_block of the device address[" << device_addr << "].";
   return nullptr;
 }
 
 void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
   auto mem_block = FindMemBlock(device_addr);
-  MS_EXCEPTION_IF_NULL(mem_block);
+  if (mem_block == nullptr) {
+    MS_LOG(WARNING) << "Can't find the mem_block of the device address[" << device_addr << "].";
+    return;
+  }
   CombineMemBuf(mem_block, device_addr);
 }
 
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
index d550b77bba..263ceaec63 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 #include <algorithm>
 #include <memory>
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace memreuse {
@@ -329,22 +329,25 @@ void MemReuseUtil::SetSummaryNodesRefCount() {
     return;
   }
 
+  size_t total_summary_size = 0;
   for (auto &node_item : summary_nodes) {
     auto node = node_item.second.first;
     size_t index = IntToSize(node_item.second.second);
-    MS_LOG(INFO) << "set summary node's ref count, node: " << node->fullname_with_scope() << " index: " << index;
     if (kernel_output_refs_.find(node.get()) != kernel_output_refs_.end()) {
       KernelRefCountPtr kernel_ref = kernel_output_refs_[node.get()][index];
       kernel_ref->ref_count_ = kMaxRefCount;
       kernel_ref->ref_count_dynamic_use_ = kMaxRefCount;
+      total_summary_size += kernel_ref->size_;
+      MS_LOG(INFO) << "Set summary node's ref count, node: " << node->fullname_with_scope() << " index: " << index;
     } else {
-      MS_LOG(WARNING) << "can't find summary node's kernel_def " << node->fullname_with_scope();
+      MS_LOG(WARNING) << "Can't find summary node's kernel_def " << node->fullname_with_scope() << " index: " << index;
     }
   }
 #ifdef MEM_REUSE_DEBUG
   auto graph = *graph_;
   MemReuseChecker::GetInstance().CheckMemReuseIR(total_refs_list_, kernel_def_ptr_list_, &graph);
 #endif
+  MS_LOG(INFO) << "Special Tensor total size: SummaryNodes: " << total_summary_size;
 }
 
 void MemReuseUtil::SetGraphOutputRefCount() {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
index 37281a7128..b286bcbc2c 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
@@ -19,10 +19,10 @@
 #include <map>
 #include <memory>
 #include <vector>
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 using mindspore::kernel::tbe::TbeUtils;
 namespace mindspore {
 namespace memreuse {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
similarity index 84%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
index b36147f9bb..d1a50a0dfe 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
@@ -13,10 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#ifdef ENABLE_D
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#endif
+#ifdef ENABLE_DEBUGGER
+#include "debug/debugger/debugger.h"
+#include "debug/debug_services.h"
+#endif
 
 namespace mindspore {
 namespace memreuse {
@@ -34,6 +40,9 @@ void BestFitMemReuse::InitMemReuseInfo(const MemReuseUtil *mem_reuse_util_ptr) {
     wk->size_ = AlignMemorySize(wk->size_);
     wk->ref_count_ = 1;
   }
+#ifdef ENABLE_D
+  stream_groups_ = device::ascend::AscendStreamAssign::GetInstance().get_stream_group();
+#endif
 }
 
 void BestFitMemReuse::InitKernelDependence() {
@@ -63,21 +72,67 @@ void BestFitMemReuse::InitKernelDependence() {
   }
 }
 
-bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const KernelDefPtr &kernel_prev) {
+bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf) {
   // determine whether the kernel_curr can reuse kernel_prev's output tensor membuf
   MS_EXCEPTION_IF_NULL(kernel_curr);
+  MS_EXCEPTION_IF_NULL(mem_buf);
+  auto kernel_prev = mem_buf->used_kernel_;
   MS_EXCEPTION_IF_NULL(kernel_prev);
+#ifdef ENABLE_DEBUGGER
+  auto debugger_ = mindspore::Debugger::GetInstance();
+  DebugServices *debug_services = debugger_->debug_services();
+  auto watchpoint_table = debug_services->GetWatchpointTable();
+  std::string current_kernel_name = kernel_curr->scope_full_name();
+  if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) {
+    return false;
+  }
+#endif
   auto curr_stream_id = kernel_curr->stream_id();
   auto prev_stream_id = kernel_prev->stream_id();
   if (curr_stream_id == prev_stream_id) {
+    mem_buf->type_ = IN_STREAM_REUSE;
+    return true;
+  }
+
+  bool reuse_between_streams = true;
+  for (auto &stream_group : stream_groups_) {
+    size_t cur_index = UINT32_MAX;
+    size_t prev_index = UINT32_MAX;
+    for (size_t index = 0; index < stream_group.size(); index++) {
+      if (curr_stream_id == stream_group[index]) {
+        cur_index = index;
+        continue;
+      }
+      if (prev_stream_id == stream_group[index]) {
+        prev_index = index;
+        continue;
+      }
+    }
+    if ((prev_index != UINT32_MAX) && (cur_index == UINT32_MAX || (prev_index > cur_index))) {
+      // previous stream and current stream are not in the same group can't be reused
+      // previous stream is behind current stream can't be reused
+      reuse_between_streams = false;
+      break;
+    }
+  }
+
+  if (reuse_between_streams) {
+    mem_buf->type_ = BETWEEN_STREAMS_REUSE;
     return true;
   }
+
   auto iter = kernel_front_map_.find(kernel_curr);
   if (iter == kernel_front_map_.end()) {
     MS_LOG(EXCEPTION) << kernel_curr->scope_full_name() << " is not init.";
   }
   auto kernel_curr_front = iter->second;
-  return kernel_curr_front.count(kernel_prev);
+  auto depend_count = kernel_curr_front.count(kernel_prev);
+  if (depend_count) {
+    mem_buf->type_ = KERNEL_DEPENDENCE_REUSE;
+    return true;
+  }
+
+  return false;
 }
 
 void BestFitMemReuse::AssignNodeOutputOffset() {
@@ -135,7 +190,7 @@ std::map<size_t, size_t> BestFitMemReuse::GetReusableMembufMap(size_t tensor_siz
     auto membuf = membuf_ptr_list_[i];
     auto index = i;
     bool is_membuf_ok = membuf->status_ == kUnused && membuf->size_ >= tensor_size;
-    if (is_membuf_ok && IsUsable(current_kernel_, membuf->used_kernel_)) {
+    if (is_membuf_ok && IsUsable(current_kernel_, membuf)) {
       (void)size_map.insert(std::make_pair(membuf->size_, index));
       break;
     }
@@ -163,8 +218,8 @@ void BestFitMemReuse::SplitMembuf(const KernelRefCount *tensor_desc, size_t memb
   auto bias = membuf->size_ - tensor_desc->size_;
   membuf->size_ = tensor_desc->size_;
   // to check if spilt membuf can be merge
-  auto new_membuf =
-    std::make_shared<Membuf>(kUnused, bias, membuf->offset_ + membuf->size_, kInvalidIndex, current_kernel_);
+  auto new_membuf = std::make_shared<Membuf>(kUnused, bias, membuf->offset_ + membuf->size_, kInvalidIndex,
+                                             membuf->type_, current_kernel_);
   (void)membuf_ptr_list_.insert(membuf_ptr_list_.begin() + SizeToInt(membuf_index + 1), new_membuf);
 }
 
@@ -176,7 +231,7 @@ void BestFitMemReuse::AddNewMembufPtr(KernelRefCount *tensor_desc, int flag) {
   }
   auto membuf_size = tensor_desc->size_;
   auto real_index = GetRealIndex(IntToSize(tensor_desc->index_), flag);
-  auto membuf = std::make_shared<Membuf>(kReused, membuf_size, membuf_offset, real_index, current_kernel_);
+  auto membuf = std::make_shared<Membuf>(kReused, membuf_size, membuf_offset, real_index, NEW, current_kernel_);
   membuf_ptr_list_.push_back(membuf);
   tensor_desc->offset_ = membuf_offset;
 }
@@ -242,7 +297,7 @@ void BestFitMemReuse::ReleaseMembuf(size_t tensor_index, int flag) {
     auto membuf_next = (*next_iter);
     MS_EXCEPTION_IF_NULL(membuf_next);
     if (membuf_next->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_next->used_kernel_);
+      bool is_merge = IsUsable(current_kernel_, membuf_next);
       if (is_merge) {
         membuf->size_ += membuf_next->size_;
         (void)membuf_ptr_list_.erase(next_iter);
@@ -254,7 +309,7 @@ void BestFitMemReuse::ReleaseMembuf(size_t tensor_index, int flag) {
     auto membuf_prev = (*prev_iter);
     MS_EXCEPTION_IF_NULL(membuf_prev);
     if (membuf_prev->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_prev->used_kernel_);
+      bool is_merge = IsUsable(current_kernel_, membuf_prev);
       if (is_merge) {
         membuf->size_ += membuf_prev->size_;
         membuf->offset_ = membuf_prev->offset_;
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
index 9aeda05dc3..ef1cfd3e11 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
@@ -30,8 +30,8 @@
 #include <unordered_set>
 #include <set>
 #include <queue>
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 
 namespace mindspore {
 namespace memreuse {
@@ -40,11 +40,12 @@ static constexpr int kDynamicMem = -1;
 static constexpr int kWorkspaceMem = 1;
 static constexpr size_t kTotalSize = 0;
 enum Status { kUnused, kReused };
+enum MEMTYPE { NEW, IN_STREAM_REUSE, BETWEEN_STREAMS_REUSE, KERNEL_DEPENDENCE_REUSE };
 class Membuf {
  public:
   Membuf() = default;
-  Membuf(Status status, size_t size, size_t offset, int index, const KernelDefPtr &used_kernel)
-      : status_(status), size_(size), offset_(offset), index_(index), used_kernel_(used_kernel) {}
+  Membuf(Status status, size_t size, size_t offset, int index, MEMTYPE type, const KernelDefPtr &used_kernel)
+      : status_(status), size_(size), offset_(offset), index_(index), type_(type), used_kernel_(used_kernel) {}
   ~Membuf() = default;
   // Memory block status flags
   Status status_ = kUnused;
@@ -52,6 +53,7 @@ class Membuf {
   size_t offset_{0};
   // Store the tensor index stored in this memory block at a certain moment
   int index_{0};
+  MEMTYPE type_{NEW};
   KernelDefPtr used_kernel_;
 };
 using MembufPtr = std::shared_ptr<Membuf>;
@@ -122,10 +124,10 @@ class BestFitMemReuse {
   /**
    * determine if the kernel_curr can reuse the output tensor add of kernel_prev
    * @param kernel_curr, current kernel
-   * @param kernel_prev, the membuf used by this kernel
+   * @param mem_buf, the membuf
    * @return bool
    */
-  bool IsUsable(const KernelDefPtr &kernel_curr, const KernelDefPtr &kernel_prev);
+  bool IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf);
   /**
    * init the dependence of all kernels in the graph
    */
@@ -150,6 +152,7 @@ class BestFitMemReuse {
   std::vector<MembufPtr> membuf_ptr_list_;
   // kernel_front_map_, key: the kernel_def, value: kernels before this kernel_def
   std::map<KernelDefPtr, std::set<KernelDefPtr>> kernel_front_map_;
+  std::vector<std::vector<uint32_t>> stream_groups_;
 };
 }  // namespace memreuse
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
index 5cd6a5f50e..b93bf42f9f 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
 #include <fstream>
 #include <vector>
 #include <utility>
@@ -413,7 +413,8 @@ void MemReuseChecker::CheckNormalIR(const session::KernelGraph *graph) {
 void MemReuseChecker::SetMembuInfos(const KernelDef *op_def, const std::vector<MembufPtr> &membuf_ptr_list) {
   std::vector<MembufPtr> curr_mem_infos;
   for (const auto &mem : membuf_ptr_list) {
-    auto mem_checker = std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->used_kernel_);
+    auto mem_checker =
+      std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->type_, mem->used_kernel_);
     curr_mem_infos.push_back(mem_checker);
   }
   membuf_all_infos_.push_back(curr_mem_infos);
@@ -427,7 +428,8 @@ void MemReuseChecker::SetAddNewMembuInfos(const KernelDef *op_def, const std::ve
   std::vector<MembufPtr> add_new_curr_mem;
 
   for (const auto &mem : membuf_ptr_list) {
-    auto mem_checker = std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->used_kernel_);
+    auto mem_checker =
+      std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->type_, mem->used_kernel_);
     add_new_curr_mem.push_back(mem_checker);
   }
   add_new_mem_infos_.push_back(add_new_curr_mem);
@@ -451,6 +453,7 @@ void MemReuseChecker::ExportEachMembufInfo(std::ofstream &ofs) {
         << "mem_size\t"
         << "mem_head\t"
         << "mem_tail\t"
+        << "mem_type\t"
         << "used_kernel\n";
     size_t curr_used = 0;
     size_t curr_allocated = 0;
@@ -461,8 +464,8 @@ void MemReuseChecker::ExportEachMembufInfo(std::ofstream &ofs) {
           << "streamID[@" << membuf->used_kernel_->stream_id() << "]"
           << "\t"
           << "#" << static_cast<int>(membuf->status_) << "\t%" << membuf->index_ << "T"
-          << "\t" << membuf->size_ << "\t" << membuf->offset_ << "\t" << membuf->offset_ + membuf->size_ << "\t"
-          << GetSplitName(used_kernel) << "\n";
+          << "\t" << membuf->size_ << "\t" << membuf->offset_ << "\t\t" << membuf->offset_ + membuf->size_ << "\t"
+          << "\t" << static_cast<int>(membuf->type_) << "\t" << GetSplitName(used_kernel) << "\n";
       if (membuf->status_ == kReused) {
         curr_used += membuf->size_;
       }
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
index 5fd3d0f5ae..3c4a00a3ca 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
@@ -22,11 +22,11 @@
 #include <string>
 #include <memory>
 #include <functional>
-#include "mindspore/ccsrc/ir/anf.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "kernel/common_utils.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "mindspore/core/ir/anf.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace memreuse {
 constexpr auto kSend = "Send";
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
index 14073bfbc9..41bf5460c3 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_swap_manager.h"
+#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
index 1969dadb54..d8620c8516 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
@@ -23,7 +23,7 @@
 #include <memory>
 #include <vector>
 #include <utility>
-#include "pre_activate/mem_reuse/mem_copy_manager.h"
+#include "backend/optimizer/mem_reuse/mem_copy_manager.h"
 
 using PerformPair = std::pair<float, float>;
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc
rename to mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
index 9df34a1c59..900dd0d563 100644
--- a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/add_atomic_clean.h"
+#include "backend/optimizer/pass/add_atomic_clean.h"
 #include <memory>
 #include <vector>
 #include <functional>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
 #include "utils/log_adapter.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h
rename to mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
index bb1edb0e35..7e3fbdb472 100644
--- a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_
 
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc
rename to mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
index 297a167aa8..133a7e764a 100644
--- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/common_subexpression_elimination.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
 #include <memory>
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
@@ -23,8 +23,8 @@ namespace {
 bool CheckEqualKernelBuildInfo(const AnfNodePtr &main, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(main);
   MS_EXCEPTION_IF_NULL(node);
-  auto main_kernel_info = main->kernel_info();
-  auto node_kernel_info = node->kernel_info();
+  auto main_kernel_info = dynamic_cast<device::KernelInfo *>(main->kernel_info());
+  auto node_kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   if (main_kernel_info == nullptr && node_kernel_info == nullptr) {
     return true;
   }
diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h
rename to mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
index 18f433ab95..bac870e59f 100644
--- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
@@ -15,8 +15,8 @@
  */
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMON_SUBEXPRESSION_ELIMINATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMON_SUBEXPRESSION_ELIMINATION_H_
-#include "pre_activate/common/pass.h"
-#include "optimizer/cse.h"
+#include "backend/optimizer/common/pass.h"
+#include "frontend/optimizer/cse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
index aa4690abcb..3ba055880c 100644
--- a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/communication_op_fusion.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
 
 #include <vector>
 #include <memory>
 #include <unordered_map>
 
 #include "utils/graph_utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
-#include "parallel/context.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "frontend/parallel/context.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
rename to mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
index d00180f97f..0e7cf9762d 100644
--- a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <string>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 #include "ir/func_graph.h"
 #include "ir/anf.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
rename to mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
index 6a557388ad..814ad9567c 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
 
 #include <utility>
 
 #include "utils/utils.h"
 #include "utils/log_adapter.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
@@ -72,6 +72,7 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
   Register(kSpaceToBatchOpName, {1});
   Register(kBatchToSpaceOpName, {1});
   Register(kPadOpName, {1});
+  Register(kPushOpName, {1});
 }
 
 ConstInputToAttrInfoRegistry &ConstInputToAttrInfoRegistry::Instance() {
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.h b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.h
rename to mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.h
diff --git a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc
rename to mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
index b0e2ab044c..51d399bbcd 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/context/ms_context.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h
rename to mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
index 2e364244bf..83b44d5f51 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_CONST_TO_ATTR_STRIDED_SLICE_GRAD_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
similarity index 76%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
index 89834cbc65..f2e35351b4 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
@@ -13,20 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_const_input_to_attr.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
 
 #include <vector>
 #include <string>
 #include <unordered_map>
 #include <memory>
 
-#include "pre_activate/pass/const_input_to_attr_registry.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
@@ -50,12 +50,6 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An
     if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), &reg)) {
       continue;
     }
-    if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimEmbeddingLookup->name() ||
-        AnfAlgo::GetCNodeName(cnode) == prim::kPrimEmbeddingLookupCommGrad->name()) {
-      if (!AnfAlgo::HasNodeAttr(kAttrPrimitiveTarget, cnode)) {
-        continue;
-      }
-    }
     ConstInputToAttr(cnode, reg.GetConstInputAttrInfo());
   }
   return node;
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
index e124ff8cf4..e6def42fa1 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
@@ -20,7 +20,7 @@
 #include <unordered_set>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
index b4f98cc6d7..f204841f3c 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
 
 #include <vector>
 #include <memory>
 #include <utility>
 
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
-#include "kernel/common_utils.h"
-#include "device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
index 1cc2bdf0ec..072652497a 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
@@ -18,7 +18,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
index a03087c1a4..b96a7af8f3 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
 
 #include <algorithm>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
-#include "session/kernel_graph.h"
-#include "kernel/common_utils.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
index b3d8e25d6e..63d2415dc5 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
index a5e51411bc..34ba83ef17 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
 
 #include <algorithm>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
-#include "session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
index a16ffaf674..9ff5ca91ed 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc
rename to mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
index 4d3dcfccc0..3ef912bcec 100644
--- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
 #include <memory>
 #include <utility>
 #include <unordered_map>
 #include <unordered_set>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
-#include "kernel/common_utils.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h
rename to mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
index c44190f645..2fb4715cff 100644
--- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <unordered_map>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc
rename to mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
index 3b566b4f7c..8c6cb4beb5 100644
--- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/erase_visit_attr.h"
+#include "backend/optimizer/pass/erase_visit_attr.h"
 #include <memory>
 #include <vector>
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h
rename to mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
index a986aad83a..37b88a4e39 100644
--- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ERASE_VISIT_ATTR_H_
 
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/pass/fuse_basic.cc
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
index 84edd5c5e2..32655f1ec2 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/fuse_basic.h"
-#include "pre_activate/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_basic.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
 
 #include <memory>
 #include <algorithm>
@@ -24,11 +24,11 @@
 #include <vector>
 #include <string>
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "vm/segment_runner.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/pass/fuse_basic.h
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
index fbbf5d9937..9b3916fe28 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
index 0e287587a2..e04110d8a0 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
 
 #include <memory>
 #include <string>
@@ -25,11 +25,11 @@
 #include <queue>
 #include <vector>
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "vm/segment_runner.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
index a5a26765a3..e14661dfdf 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc
rename to mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
index af16017a7c..a51a6bab42 100644
--- a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/getitem_tuple.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
 
 #include <memory>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.h b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/getitem_tuple.h
rename to mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
index 0fc42a15dc..9a25b924bd 100644
--- a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_GETITEM_TUPLE_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_GETITEM_TUPLE_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc
rename to mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
index 1d5f909e7d..710e130a85 100644
--- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/optimize_dependence.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
 #include <memory>
 #include <vector>
 #include <string>
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/optimize_dependence.h
rename to mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
index 30027b790a..8ddd4d662e 100644
--- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_OPTIMIZE_DEPENDENCE_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_OPTIMIZE_DEPENDENCE_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc
new file mode 100644
index 0000000000..cd34464cda
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/pass/replace_node_by_proxy.h"
+#include <vector>
+#include <memory>
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+
+namespace mindspore {
+namespace opt {
+kernel::KernelBuildInfoPtr ReplaceNodeByProxy::GenerateKernelBuildInfo(const CNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<std::string> inputs_device_format;
+  std::vector<std::string> outputs_device_format;
+  std::vector<TypeId> inputs_device_type;
+  std::vector<TypeId> outputs_device_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
+  for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) {
+    inputs_device_format.push_back(AnfAlgo::GetInputFormat(cnode, input_index));
+    inputs_device_type.push_back(AnfAlgo::GetInputDeviceDataType(cnode, input_index));
+  }
+  for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(cnode); ++output_index) {
+    outputs_device_format.push_back(AnfAlgo::GetOutputFormat(cnode, output_index));
+    outputs_device_type.push_back(AnfAlgo::GetOutputDeviceDataType(cnode, output_index));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(cnode, output_index));
+  }
+  builder.SetFusionType(AnfAlgo::GetFusionType(cnode));
+  builder.SetProcessor(AnfAlgo::GetProcessor(cnode));
+  builder.SetKernelType(AnfAlgo::GetKernelType(cnode));
+
+  builder.SetInputsFormat(inputs_device_format);
+  builder.SetOutputsFormat(outputs_device_format);
+  builder.SetInputsDeviceType(inputs_device_type);
+  builder.SetOutputsDeviceType(outputs_device_type);
+  return builder.Build();
+}
+
+bool ReplaceNodeByProxy::Run(const FuncGraphPtr &func_graph) {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  auto manager = func_graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return());
+  for (auto node : node_list) {
+    if (node != nullptr && node->isa<CNode>() && AnfAlgo::GetCNodeName(node) == kEmbeddingLookupOpName) {
+      CNodePtr cnode = node->cast<CNodePtr>();
+      auto prim = std::make_shared<Primitive>(kEmbeddingLookupProxyOpName);
+      MS_EXCEPTION_IF_NULL(prim);
+      std::vector<AnfNodePtr> proxy_inputs = {NewValueNode(prim)};
+      proxy_inputs.insert(proxy_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
+      AnfNodePtr proxy_node = func_graph->NewCNode(proxy_inputs);
+      MS_EXCEPTION_IF_NULL(proxy_node);
+
+      auto kernel_info = std::make_shared<device::KernelInfo>();
+      MS_EXCEPTION_IF_NULL(kernel_info);
+      proxy_node->set_kernel_info(kernel_info);
+
+      AbstractBasePtrList abstract_list;
+      AnfAlgo::CopyNodeAttr(kAttrPsKey, cnode, proxy_node);
+      AnfAlgo::CopyNodeAttr("reduce_scatter_flag", cnode, proxy_node);
+      AnfAlgo::CopyNodeAttr("offset", cnode, proxy_node);
+      abstract_list.push_back(cnode->abstract());
+      auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+      MS_EXCEPTION_IF_NULL(abstract_tuple);
+      proxy_node->set_abstract(abstract_tuple);
+
+      auto kernel_build_info = GenerateKernelBuildInfo(cnode);
+      AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info, proxy_node.get());
+
+      if (!manager->Replace(cnode, proxy_node)) {
+        MS_LOG(EXCEPTION) << "Replace node by proxy node failed.";
+      }
+    }
+  }
+  return true;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h
new file mode 100644
index 0000000000..382b08304f
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
+#include <utility>
+#include <vector>
+#include <string>
+
+#include "backend/optimizer/common/pass.h"
+#include "ir/func_graph.h"
+#include "ir/anf.h"
+#include "utils/utils.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+
+namespace mindspore {
+namespace opt {
+class ReplaceNodeByProxy : public Pass {
+ public:
+  explicit ReplaceNodeByProxy(const std::string &name) : Pass(name) {}
+  ~ReplaceNodeByProxy() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+
+ private:
+  kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CNodePtr &cnode);
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/backend/session/CMakeLists.txt
similarity index 89%
rename from mindspore/ccsrc/session/CMakeLists.txt
rename to mindspore/ccsrc/backend/session/CMakeLists.txt
index 782eb51183..b7b791ada9 100644
--- a/mindspore/ccsrc/session/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/session/CMakeLists.txt
@@ -29,4 +29,4 @@ if (ENABLE_D)
 endif ()
 
 set_property(SOURCE ${_SESSION_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_SESSION)
-add_library(_mindspore_session_obj OBJECT ${_SESSION_SRC_LIST})
+add_library(_mindspore_backend_session_obj OBJECT ${_SESSION_SRC_LIST})
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
similarity index 87%
rename from mindspore/ccsrc/session/anf_runtime_algorithm.cc
rename to mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
index 81ad02e787..38c040e6b1 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -13,20 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include <memory>
 #include <algorithm>
 #include <map>
 #include <set>
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "device/device_address.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
+#include "runtime/device/kernel_info.h"
+#include "runtime/device/device_address.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "common/utils.h"
 #include "common/trans.h"
 
@@ -40,6 +40,9 @@ using kernel::KernelBuildInfoPtr;
 using kernel::KernelMod;
 using kernel::KernelModPtr;
 namespace {
+constexpr size_t kNopNodeInputSize = 2;
+constexpr size_t kNopNodeRealInputIndex = 1;
+
 std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
   MS_EXCEPTION_IF_NULL(shape);
   std::vector<size_t> shape_size_t;
@@ -48,6 +51,26 @@ std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
 }
 }  // namespace
 
+AnfNodePtr AnfRuntimeAlgorithm::GetTupleGetItemRealInput(const CNodePtr &tuple_get_item) {
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  if (tuple_get_item->size() != kTupleGetItemInputSize) {
+    MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
+  }
+  return tuple_get_item->input(kRealInputNodeIndexInTupleGetItem);
+}
+
+size_t AnfRuntimeAlgorithm::GetTupleGetItemOutIndex(const CNodePtr &tuple_get_item) {
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  if (tuple_get_item->size() != kTupleGetItemInputSize) {
+    MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
+  }
+  auto output_index_value_node = tuple_get_item->input(kInputNodeOutputIndexInTupleGetItem);
+  MS_EXCEPTION_IF_NULL(output_index_value_node);
+  auto value_node = output_index_value_node->cast<ValueNodePtr>();
+  MS_EXCEPTION_IF_NULL(value_node);
+  return IntToSize(GetValue<int>(value_node->value()));
+}
+
 KernelWithIndex AnfRuntimeAlgorithm::VisitKernel(const AnfNodePtr &anf_node, size_t index) {
   MS_EXCEPTION_IF_NULL(anf_node);
   if (anf_node->isa<ValueNode>()) {
@@ -83,49 +106,47 @@ KernelWithIndex AnfRuntimeAlgorithm::VisitKernel(const AnfNodePtr &anf_node, siz
   }
 }
 
-KernelWithIndex AnfRuntimeAlgorithm::VisitKernelWithReturnType(const AnfNodePtr &anf_node, size_t index,
+KernelWithIndex AnfRuntimeAlgorithm::VisitKernelWithReturnType(const AnfNodePtr &anf_node, int index,
                                                                bool visit_nop_node,
                                                                const std::vector<PrimitivePtr> &return_types) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  for (const auto &prim_type : return_types) {
-    if (CheckPrimitiveType(anf_node, prim_type)) {
-      return std::make_pair(anf_node, index);
-    }
+  if (std::any_of(return_types.begin(), return_types.end(), [&anf_node](const PrimitivePtr &prim_type) -> bool {
+        return CheckPrimitiveType(anf_node, prim_type);
+      })) {
+    return KernelWithIndex(anf_node, index);
   }
-  if (anf_node->isa<ValueNode>()) {
-    return std::make_pair(anf_node, 0);
-  } else if (anf_node->isa<Parameter>()) {
-    return std::make_pair(anf_node, 0);
-  } else if (anf_node->isa<CNode>()) {
-    auto cnode = anf_node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto input0 = cnode->input(0);
-    MS_EXCEPTION_IF_NULL(input0);
-    if (IsPrimitive(input0, prim::kPrimTupleGetItem)) {
-      if (cnode->inputs().size() != kTupleGetItemInputSize) {
-        MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
-      }
-      auto input2 = cnode->input(kInputNodeOutputIndexInTupleGetItem);
-      MS_EXCEPTION_IF_NULL(input2);
-      auto value_node = input2->cast<ValueNodePtr>();
-      MS_EXCEPTION_IF_NULL(value_node);
-      int item_idx = GetValue<int>(value_node->value());
-      return VisitKernelWithReturnType(cnode->input(kRealInputNodeIndexInTupleGetItem), IntToSize(item_idx),
-                                       visit_nop_node, return_types);
-    } else if (IsPrimitive(input0, prim::kPrimDepend) || IsPrimitive(input0, prim::kPrimControlDepend)) {
-      return VisitKernelWithReturnType(cnode->input(kRealInputIndexInDepend), 0, visit_nop_node, return_types);
-    } else if (opt::IsNopNode(cnode) && visit_nop_node) {
-      if (cnode->inputs().size() == 2) {
-        return VisitKernelWithReturnType(cnode->input(1), 0, visit_nop_node, return_types);
-      } else {
-        MS_LOG(EXCEPTION) << cnode->DebugString() << "Invalid nop node";
+  if (!anf_node->isa<CNode>()) {
+    return KernelWithIndex(anf_node, 0);
+  }
+  auto cnode = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  if (CheckPrimitiveType(cnode, prim::kPrimTupleGetItem)) {
+    auto item_with_index_tmp = VisitKernelWithReturnType(GetTupleGetItemRealInput(cnode),
+                                                         GetTupleGetItemOutIndex(cnode), visit_nop_node, return_types);
+    if (CheckPrimitiveType(item_with_index_tmp.first, prim::kPrimMakeTuple)) {
+      MS_EXCEPTION_IF_NULL(item_with_index_tmp.first);
+      auto make_tuple = item_with_index_tmp.first->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(make_tuple);
+      const std::vector<AnfNodePtr> &make_tuple_inputs = make_tuple->inputs();
+      size_t make_tuple_input_index = item_with_index_tmp.second + 1;
+      if (make_tuple_input_index >= make_tuple_inputs.size()) {
+        MS_LOG(EXCEPTION) << "Index[" << make_tuple_input_index << "] out of range[" << make_tuple_inputs.size()
+                          << "].";
       }
-    } else {
-      return std::make_pair(anf_node, index);
+      return VisitKernelWithReturnType(make_tuple_inputs[make_tuple_input_index], 0, visit_nop_node, return_types);
     }
-  } else {
-    MS_LOG(EXCEPTION) << "The input is invalid";
+    return item_with_index_tmp;
+  }
+  if (CheckPrimitiveType(cnode, prim::kPrimDepend) || CheckPrimitiveType(cnode, prim::kPrimControlDepend)) {
+    return VisitKernelWithReturnType(cnode->input(kRealInputIndexInDepend), index, visit_nop_node, return_types);
+  }
+  if (opt::IsNopNode(cnode) && visit_nop_node) {
+    if (cnode->size() != kNopNodeInputSize) {
+      MS_LOG(EXCEPTION) << "Invalid nop node " << cnode->DebugString();
+    }
+    return VisitKernelWithReturnType(cnode->input(kNopNodeRealInputIndex), 0, visit_nop_node, return_types);
   }
+  return KernelWithIndex(anf_node, index);
 }
 
 std::vector<AnfNodePtr> AnfRuntimeAlgorithm::GetAllOutput(const AnfNodePtr &node,
@@ -338,7 +359,7 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t
   if (!AnfAlgo::IsRealKernel(node)) {
     return AnfAlgo::GetPrevNodeOutputFormat(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -360,7 +381,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
   if (!IsRealKernel(node)) {
     GetPrevNodeOutputFormat(node, input_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -467,7 +488,7 @@ std::vector<kernel::Axis> AnfRuntimeAlgorithm::GetInputReshapeType(const AnfNode
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, input_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -486,7 +507,7 @@ std::vector<kernel::Axis> AnfRuntimeAlgorithm::GetOutputReshapeType(const AnfNod
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -546,7 +567,7 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -567,7 +588,7 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, 0);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -591,13 +612,13 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node,
   if (opt::IsNopNode(node) && visit_nop_node) {
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
-    if (cnode->inputs().size() == 2) {
+    if (cnode->size() == kNopNodeInputSize) {
       return AnfRuntimeAlgorithm::GetPrevNodeOutputAddr(cnode, 0);
     } else {
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node";
     }
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -613,13 +634,13 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod
   if (opt::IsNopNode(node) && visit_nop_node) {
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
-    if (cnode->inputs().size() == 2) {
+    if (cnode->inputs().size() == kNopNodeInputSize) {
       return AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(cnode, 0);
     } else {
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node.";
     }
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -636,7 +657,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
     MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ "
                       << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]";
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->OutputAddrExist(output_idx);
 }
@@ -656,7 +677,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNode
 // set output device addr of anf_node
 void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetOutputAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail";
@@ -666,7 +687,7 @@ void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t out
 // set workspace device addr of anf_node
 void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetWorkspaceAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail";
@@ -676,7 +697,7 @@ void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t
 // get workspace device addr of anf_node
 DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetWorkspaceAddr(output_idx);
   if (addr == nullptr) {
@@ -720,7 +741,7 @@ void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_
 
 kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -731,7 +752,7 @@ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
 // get KernelBuildType of node, such as ATT,RT,FWK and so on
 KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -741,7 +762,7 @@ KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
 
 kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -750,7 +771,7 @@ kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
 
 kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -760,7 +781,7 @@ kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
 // set select kernel_build_info
 void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->set_select_kernel_build_info(select_kernel_build_info);
 }
@@ -768,7 +789,7 @@ void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &sel
 // get select kernel_build_info
 KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->GetMutableSelectKernelBuildInfo();
 }
@@ -776,7 +797,7 @@ KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePt
 // get kernelMode
 KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->MutableKernelMod();
 }
@@ -784,7 +805,7 @@ KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
 // set kernel mod
 void AnfRuntimeAlgorithm::SetKernelMod(const KernelModPtr &kernel_mod, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_kernel_mod(kernel_mod);
 }
@@ -806,7 +827,7 @@ bool AnfRuntimeAlgorithm::IsRealKernel(const AnfNodePtr &node) {
                          IsPrimitive(input, prim::kPrimHistogramSummary) || IsPrimitive(input, prim::kPrimMakeTuple) ||
                          IsPrimitive(input, prim::kPrimStateSetItem) || IsPrimitive(input, prim::kPrimDepend) ||
                          IsPrimitive(input, prim::kPrimTupleGetItem) || IsPrimitive(input, prim::kPrimControlDepend) ||
-                         IsPrimitive(input, prim::kPrimReturn);
+                         IsPrimitive(input, prim::kPrimReturn) || IsPrimitive(input, prim::kPrimPartial);
   return !is_virtual_node;
 }
 
@@ -850,42 +871,42 @@ bool AnfRuntimeAlgorithm::IsParameterWeight(const ParameterPtr &node) {
 
 void AnfRuntimeAlgorithm::SetStreamId(uint32_t stream_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_id(stream_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamId(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_id();
 }
 
 void AnfRuntimeAlgorithm::SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_distinction_label(stream_label);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamDistinctionLabel(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_distinction_label();
 }
 
 void AnfRuntimeAlgorithm::SetGraphId(uint32_t graph_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_graph_id(graph_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetGraphId(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->graph_id();
 }
@@ -913,7 +934,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) {
   if (node->isa<ValueNode>()) {
     return false;
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->is_feature_map();
 }
@@ -1117,5 +1138,14 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
   }
   return GetCNodeOutputPrecision(kernel_with_index.first);
 }
+
+bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->inputs().empty()) {
+    MS_LOG(EXCEPTION) << "Illegal null input of cnode.";
+  }
+  auto input = node->input(kAnfPrimitiveIndex);
+  return IsPrimitive(input, prim::kPrimLabelGoto) || IsPrimitive(input, prim::kPrimLabelSwitch);
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
similarity index 94%
rename from mindspore/ccsrc/session/anf_runtime_algorithm.h
rename to mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
index 8205619793..4fa3150e36 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
@@ -25,24 +25,29 @@
 #include <memory>
 #include "ir/anf.h"
 #include "ir/dtype.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/primitive.h"
-#include "device/device_address.h"
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
-#include "operator/ops.h"
+#include "runtime/device/device_address.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "frontend/operator/ops.h"
 #include "utils/contract.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace session {
 using AnfVisitFuncion = std::function<Any(const AnfNodePtr &node, int index)>;
 using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
+using DeviceAddress = device::DeviceAddress;
+using DeviceAddressPtr = device::DeviceAddressPtr;
 class AnfRuntimeAlgorithm {
  public:
+  // get real input node of tuple_get_item
+  static AnfNodePtr GetTupleGetItemRealInput(const CNodePtr &tuple_get_item);
+  static size_t GetTupleGetItemOutIndex(const CNodePtr &tuple_get_item);
   // get input_anf_node's real kernel by recurse
   static KernelWithIndex VisitKernel(const AnfNodePtr &input_anf_node, size_t output_index);
-  static KernelWithIndex VisitKernelWithReturnType(const AnfNodePtr &input_anf_node, size_t output_index,
+  static KernelWithIndex VisitKernelWithReturnType(const AnfNodePtr &input_anf_node, int output_index,
                                                    bool visit_nop_node = false,
                                                    const std::vector<PrimitivePtr> &return_types = {
                                                      prim::kPrimMakeTuple});
@@ -203,6 +208,7 @@ class AnfRuntimeAlgorithm {
   static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
   // get fix output precision from prev node, input_idx is the input index of current node related to prev node.
   static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
+  static bool IsCondControlKernel(const CNodePtr &node);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
diff --git a/mindspore/ccsrc/session/ascend_control_parser.cc b/mindspore/ccsrc/backend/session/ascend_control_parser.cc
similarity index 54%
rename from mindspore/ccsrc/session/ascend_control_parser.cc
rename to mindspore/ccsrc/backend/session/ascend_control_parser.cc
index 0c97116c6e..274b355679 100644
--- a/mindspore/ccsrc/session/ascend_control_parser.cc
+++ b/mindspore/ccsrc/backend/session/ascend_control_parser.cc
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#include "session/ascend_control_parser.h"
+#include "backend/session/ascend_control_parser.h"
 #include <utility>
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include <algorithm>
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/union_find_set.h"
-#include "device/ascend/ascend_label_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
 
 static constexpr size_t kCNodePrim = 0;
 static constexpr size_t kCNodeCallArg = 1;
@@ -31,94 +32,11 @@ static constexpr size_t kCNodePartialLength = 2;
 static constexpr size_t kCNodePartialFunc = 1;
 static constexpr size_t kCNodeSwitchLayerBranch = 2;
 static constexpr size_t kCNodeSwitchLayerLength = 3;
+static constexpr size_t kCNodeAssignTarget = 1;
+static constexpr size_t kCNodeAssignSource = 2;
 
 namespace mindspore {
 namespace session {
-static CNodePtr GetJumpNode(NotNull<KernelGraphPtr> parent_graph, NotNull<KernelGraphPtr> child_graph) {
-  auto &nodes = parent_graph->execution_order();
-  CNodePtr last_jump_node = nullptr;
-  for (auto &node : nodes) {
-    if (IsPrimitiveCNode(node, prim::kPrimLabelGoto)) {
-      if (child_graph->get_start_label() == node->input(kCNodeCallArg)) {
-        return node;
-      }
-      last_jump_node = node;
-    } else if (IsPrimitiveCNode(node, prim::kPrimLabelSwitch)) {
-      if (child_graph->get_start_label() == node->input(kCNodeSwitchFalse) ||
-          child_graph->get_start_label() == node->input(kCNodeSwitchTrue)) {
-        return node;
-      }
-      last_jump_node = node;
-    }
-  }
-  if (last_jump_node == nullptr) {
-    MS_LOG(EXCEPTION) << "Cannot find jump node from " << parent_graph->ToString() << " to " << child_graph->ToString();
-  }
-  return last_jump_node;
-}
-
-static void InitUnionFindSet(NotNull<KernelGraphPtr> kg, const NotNull<UnionFindSet<AnfNodePtr> *> union_find_set,
-                             const NotNull<std::set<KernelGraphPtr> *> memo) {
-  if (memo->find(kg.get()) != memo->end()) {
-    return;
-  }
-  memo->insert(kg.get());
-
-  const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-  for (auto &iter : real_inputs) {
-    auto &para = iter.first;
-    MS_EXCEPTION_IF_NULL(para);
-    if (para->isa<Parameter>()) {
-      union_find_set->Add(para);
-    }
-    for (auto &arg : iter.second) {
-      MS_EXCEPTION_IF_NULL(arg);
-      if (!arg->isa<Parameter>()) {
-        continue;
-      }
-      union_find_set->Add(arg);
-    }
-  }
-  for (auto &child : kg->child_graph_order()) {
-    InitUnionFindSet(NOT_NULL(child), union_find_set, memo);
-  }
-}
-
-static void UnionParentParameter(NotNull<KernelGraphPtr> kg, const NotNull<UnionFindSet<AnfNodePtr> *> union_find_set,
-                                 const NotNull<std::set<KernelGraphPtr> *> memo) {
-  if (memo->find(kg.get()) != memo->end()) {
-    return;
-  }
-  memo->insert(kg.get());
-
-  const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-  for (auto &iter : real_inputs) {
-    auto &para = iter.first;
-    for (auto &arg : iter.second) {
-      MS_EXCEPTION_IF_NULL(arg);
-      if (!arg->isa<Parameter>()) {
-        continue;
-      }
-      if (kg->unreuse_args().find(arg) != kg->unreuse_args().end()) {
-        continue;
-      }
-      union_find_set->Union(arg, para);
-    }
-  }
-  for (auto &child : kg->child_graph_order()) {
-    UnionParentParameter(NOT_NULL(child), union_find_set, memo);
-  }
-}
-
-static UnionFindSet<AnfNodePtr> MakeUnionFindSet(NotNull<KernelGraphPtr> root_kg) {
-  UnionFindSet<AnfNodePtr> result;
-  std::set<KernelGraphPtr> memo;
-  InitUnionFindSet(root_kg, NOT_NULL(&result), NOT_NULL(&memo));
-  memo.clear();
-  UnionParentParameter(root_kg, NOT_NULL(&result), NOT_NULL(&memo));
-  return result;
-}
-
 static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> main_parameter,
                                  const std::set<AnfNodePtr> &parameter_reuse_set,
                                  const NotNull<std::set<KernelGraphPtr> *> memo) {
@@ -135,8 +53,9 @@ static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr>
       continue;
     }
     MS_EXCEPTION_IF_NULL(para);
-    MS_LOG(INFO) << "Replace " << para->DebugString() << " of graph " << AnfAlgo::GetGraphId(para.get()) << " to "
-                 << main_parameter->DebugString() << " of graph " << AnfAlgo::GetGraphId(main_parameter.get().get());
+    MS_LOG(INFO) << "In " << kg->ToString() << " replace " << para->DebugString() << " of graph "
+                 << AnfAlgo::GetGraphId(para.get()) << " to " << main_parameter->DebugString() << " of graph "
+                 << AnfAlgo::GetGraphId(main_parameter.get().get());
     kg->ReplaceNode(NOT_NULL(para), main_parameter);
   }
 
@@ -145,7 +64,7 @@ static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr>
   }
 }
 
-static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNodePtr key,
+static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNodePtr &key,
                                    const std::set<AnfNodePtr> &parameter_reuse_set) {
   AnfNodePtr main_parameter = key;
   std::set<AnfNodePtr> root_inputs_set;
@@ -160,8 +79,19 @@ static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNod
   return main_parameter;
 }
 
-static void ReuseParameter(NotNull<KernelGraphPtr> root_kg, NotNull<UnionFindSet<AnfNodePtr> *> parameter_set) {
-  auto parameter_reuse_sets = parameter_set->GetSets();
+static void ReuseParameter(NotNull<KernelGraphPtr> root_kg,
+                           const std::vector<std::pair<AnfNodePtr, AnfNodePtr>> &link_list) {
+  // make union find set
+  UnionFindSet<AnfNodePtr> union_find_set;
+  for (auto &[param, arg] : link_list) {
+    union_find_set.Add(param);
+    union_find_set.Add(arg);
+  }
+  for (auto &[param, arg] : link_list) {
+    union_find_set.Union(param, arg);
+  }
+  auto parameter_reuse_sets = union_find_set.GetSets();
+
   for (auto &[key, parameter_reuse_set] : parameter_reuse_sets) {
     if (parameter_reuse_set.size() <= 1) {
       continue;
@@ -172,7 +102,7 @@ static void ReuseParameter(NotNull<KernelGraphPtr> root_kg, NotNull<UnionFindSet
   }
 }
 
-CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
+static CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
   for (size_t i = start; i < list.size() - 1; ++i) {
     if (!IsPrimitiveCNode(list[i], prim::kPrimPartial) && AnfAlgo::IsRealKernel(list[i])) {
       return list[i];
@@ -181,71 +111,287 @@ CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
   return nullptr;
 }
 
+static void UpdateLabelIdToLabelSetMap(const std::vector<CNodePtr> &exec_order,
+                                       const NotNull<std::map<uint32_t, CNodePtr> *> label_id_to_label_set) {
+  for (auto &node : exec_order) {
+    MS_EXCEPTION_IF_NULL(node);
+    if (!IsPrimitiveCNode(node, prim::kPrimLabelSet)) {
+      continue;
+    }
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, node)) {
+      MS_LOG(EXCEPTION) << node->DebugString() << " has no attr kAttrLabelIndex";
+    }
+    uint32_t label_id = AnfAlgo::GetNodeAttr<uint32_t>(node, kAttrLabelIndex);
+    if (auto iter = label_id_to_label_set->find(label_id); iter != label_id_to_label_set->end()) {
+      MS_LOG(EXCEPTION) << "There are more than one node has same label id " << label_id
+                        << ", node: " << iter->second->DebugString() << " and " << node->DebugString();
+    }
+    (*label_id_to_label_set)[label_id] = node;
+  }
+}
+
+static std::vector<CNodePtr> GetTargetLabelSetNodes(NotNull<CNodePtr> jump_node,
+                                                    const std::map<uint32_t, CNodePtr> &label_id_to_label_set) {
+  std::vector<uint32_t> target_label_list;
+  std::vector<CNodePtr> target_labelset_nodes;
+  if (IsPrimitiveCNode(jump_node.get(), prim::kPrimLabelGoto)) {
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, jump_node)) {
+      MS_LOG(EXCEPTION) << jump_node->DebugString() << " has no attr kAttrLabelIndex";
+    }
+    uint32_t label_id = AnfAlgo::GetNodeAttr<uint32_t>(jump_node.get(), kAttrLabelIndex);
+    target_label_list.push_back(label_id);
+  } else if (IsPrimitiveCNode(jump_node.get(), prim::kPrimLabelSwitch)) {
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, jump_node)) {
+      MS_LOG(EXCEPTION) << jump_node->DebugString() << " has no attr kPrimLabelSwitch";
+    }
+    target_label_list = AnfAlgo::GetNodeAttr<std::vector<uint32_t>>(jump_node.get(), kAttrLabelSwitchList);
+  } else {
+    MS_LOG(EXCEPTION) << "Unknown type jump node " << jump_node->DebugString();
+  }
+
+  for (auto label_id : target_label_list) {
+    auto iter = label_id_to_label_set.find(label_id);
+    if (iter == label_id_to_label_set.end()) {
+      MS_LOG(EXCEPTION) << "Connot find LabelSet node has label id " << label_id;
+    }
+    target_labelset_nodes.push_back(iter->second);
+  }
+  return target_labelset_nodes;
+}
+
+static void EraseNodeFromExecOrder(const AnfNodePtr &node, const NotNull<std::vector<CNodePtr> *> exec_order) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto exec_iter = std::find(exec_order->begin(), exec_order->end(), node);
+  if (exec_iter == exec_order->end()) {
+    MS_LOG(EXCEPTION) << "Cannot find " << node->DebugString() << " in exec order.";
+  }
+  exec_order->erase(exec_iter);
+}
+
 void AscendControlParser::LinkGraph(NotNull<KernelGraphPtr> kg) {
   std::set<KernelGraphPtr> memo;
+  std::vector<std::pair<AnfNodePtr, AnfNodePtr>> link_list;
+  // Insert Assign
+  ChildGraphDataAssign(kg, NOT_NULL(&link_list), NOT_NULL(&memo));
+  // Reuse Parameter
+  ReuseParameter(kg, link_list);
+  // replace call by label goto / label switch
+  memo.clear();
   (void)ProcessKernelGraph(kg, nullptr, nullptr, NOT_NULL(&memo));
+  // assign label resource
   device::ascend::AscendLabelAssign::GetInstance().AssignLabel(kg);
-  std::map<uint32_t, KernelGraphPtr> graph_id_map;
-  for (auto &g : memo) {
-    MS_EXCEPTION_IF_NULL(g);
-    if (graph_id_map.find(g->graph_id()) != graph_id_map.end()) {
-      MS_LOG(EXCEPTION) << "Two graph has same graph id " << g->graph_id()
-                        << ", graph: " << graph_id_map[g->graph_id()]->ToString() << " " << g->ToString();
+}
+
+void AscendControlParser::EraseParameter(NotNull<KernelGraphPtr> root_graph,
+                                         const std::set<KernelGraphPtr> &graph_list) {
+  std::vector<CNodePtr> exec_order = root_graph->execution_order();
+  std::set<CNodePtr> search_list(exec_order.begin(), exec_order.end());
+  std::set<AnfNodePtr> root_inputs(root_graph->inputs().begin(), root_graph->inputs().end());
+  auto ref_map = root_graph->GetRefMap();
+  ReferenceCounter parameter_count([](int32_t read, int32_t write) -> bool { return write == 1; });
+  std::multimap<AnfNodePtr, std::tuple<size_t, AnfNodePtr, size_t>> ref_multimap;
+  std::transform(ref_map.begin(), ref_map.end(), std::inserter(ref_multimap, ref_multimap.end()),
+                 [](const std::pair<std::pair<AnfNodePtr, size_t>, std::pair<AnfNodePtr, size_t>> &p)
+                   -> std::pair<AnfNodePtr, std::tuple<size_t, AnfNodePtr, size_t>> {
+                   return {p.first.first, {p.first.second, p.second.first, p.second.second}};
+                 });
+  std::set<CNodePtr> all_nodes;
+  std::map<AnfNodePtr, CNodePtr> para_to_written_node;
+  for (auto &graph : graph_list) {
+    auto out = graph->get_return();
+    MS_EXCEPTION_IF_NULL(out);
+    search_list.insert(out->cast<CNodePtr>());
+    auto nodes = TopoSort(out);
+    for (auto &node : nodes) {
+      MS_EXCEPTION_IF_NULL(node);
+      auto cnode = node->cast<CNodePtr>();
+      if (cnode != nullptr) {
+        all_nodes.insert(cnode);
+      }
+    }
+  }
+  // prepare referance count
+  for (auto &node : search_list) {
+    MS_EXCEPTION_IF_NULL(node);
+    // if assign node
+    std::set<AnfNodePtr> refed_parameters;
+    for (auto [iter, end] = ref_multimap.equal_range(node); iter != end; ++iter) {
+      refed_parameters.insert(std::get<1>(iter->second));
+    }
+
+    for (auto &in : node->inputs()) {
+      auto visit_node = AnfAlgo::VisitKernelWithReturnType(in, 0).first;
+      if (!visit_node->isa<Parameter>() || root_inputs.find(visit_node) != root_inputs.end()) {
+        continue;
+      }
+      if (refed_parameters.find(visit_node) != refed_parameters.end()) {
+        parameter_count.AddWriteCount(visit_node, 1);
+        para_to_written_node[visit_node] = node;
+      } else {
+        parameter_count.AddReadCount(visit_node, 1);
+      }
     }
-    graph_id_map[g->graph_id()] = g;
   }
 
-  // Insert Assign
-  ChildGraphDataAssign(graph_id_map);
-  // Make UnionFindSet
-  UnionFindSet<AnfNodePtr> parameter_set = MakeUnionFindSet(kg);
-  // Reuse Parameter
-  ReuseParameter(kg, NOT_NULL(&parameter_set));
+  while (parameter_count.HasValidElem()) {
+    auto [para, read, written] = parameter_count.GetOneValidElem();
+    MS_LOG(INFO) << para->DebugString() << " was read " << read << " times, written " << written << " times.";
+    auto assign_iter = para_to_written_node.find(para);
+    if (assign_iter == para_to_written_node.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find assign node that write " << para->DebugString();
+    }
+    auto &assign_node = assign_iter->second;
+    MS_EXCEPTION_IF_NULL(assign_node);
+    if (!IsPrimitiveCNode(assign_node, prim::kPrimAssign)) {
+      parameter_count.EraseElem(para);
+      continue;
+    }
+    MS_LOG(INFO) << "Erase " << assign_node->DebugString(5);
+    EraseNodeFromExecOrder(assign_node, NOT_NULL(&exec_order));
+
+    auto source = AnfAlgo::VisitKernelWithReturnType(assign_node->input(kCNodeAssignSource), 0).first;
+    parameter_count.AddReadCount(source, -1);
+    parameter_count.AddWriteCount(para, -1);
+    for (auto &node : all_nodes) {
+      for (size_t i = 0; i < node->size(); ++i) {
+        if (node->input(i) == para) {
+          MS_LOG_INFO << "Replace " << node->DebugString() << " input " << i << " by " << source->DebugString();
+          node->set_input(i, source);
+        }
+      }
+    }
+    parameter_count.AddReadCount(source, 1);
+    parameter_count.AddReadCount(para, -1);
+  }
+  root_graph->set_execution_order(exec_order);
+}
+
+void AscendControlParser::EraseLabel(NotNull<KernelGraphPtr> root_graph) {
+  std::vector<CNodePtr> exec_order = root_graph->execution_order();
+  ReferenceCounter label_count([](int32_t read, int32_t write) -> bool { return read <= 1; });
+  std::map<AnfNodePtr, CNodePtr> label_to_written_node;
+  std::map<uint32_t, CNodePtr> label_id_to_label_set;
+  UpdateLabelIdToLabelSetMap(exec_order, NOT_NULL(&label_id_to_label_set));
+  CNodePtr last_node = nullptr;
+  for (auto &cur_node : exec_order) {
+    MS_EXCEPTION_IF_NULL(cur_node);
+    if (AnfAlgo::IsCondControlKernel(cur_node)) {
+      std::vector<CNodePtr> target_labelset_nodes = GetTargetLabelSetNodes(NOT_NULL(cur_node), label_id_to_label_set);
+      for (auto &label_set : target_labelset_nodes) {
+        label_count.AddReadCount(label_set, 1);
+        label_to_written_node[label_set] = cur_node;
+      }
+    } else if (IsPrimitiveCNode(cur_node, prim::kPrimLabelSet)) {
+      label_count.AddWriteCount(cur_node, 1);
+      if (last_node != nullptr && !AnfAlgo::IsCondControlKernel(last_node)) {
+        label_count.AddReadCount(cur_node, 1);
+        label_to_written_node[cur_node] = last_node;
+      }
+    }
+    last_node = cur_node;
+  }
+
+  while (label_count.HasValidElem()) {
+    auto [label_set, read, written] = label_count.GetOneValidElem();
+    MS_LOG(INFO) << label_set->DebugString() << " was read " << read << " times, written " << written << " times.";
+    auto iter = label_to_written_node.find(label_set);
+    if (read > 0 && iter == label_to_written_node.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find node jump to " << label_set->DebugString();
+    }
+    CNodePtr jump_node = read > 0 ? iter->second : nullptr;
+    if (jump_node == nullptr || IsPrimitiveCNode(jump_node, prim::kPrimLabelGoto)) {
+      MS_LOG(INFO) << "Erase node " << label_set->DebugString();
+      EraseNodeFromExecOrder(label_set, NOT_NULL(&exec_order));
+    }
+    if (jump_node != nullptr && IsPrimitiveCNode(jump_node, prim::kPrimLabelGoto)) {
+      MS_LOG(INFO) << "Erase node " << jump_node->DebugString();
+      EraseNodeFromExecOrder(jump_node, NOT_NULL(&exec_order));
+    }
+    label_count.EraseElem(label_set);
+  }
+
+  root_graph->set_execution_order(exec_order);
 }
 
 void AscendControlParser::ExecutorValidate(NotNull<KernelGraphPtr> root_graph) {
   std::set<KernelGraphPtr> memo;
   (void)RecurseGraph(root_graph, NOT_NULL(&memo));
+  EraseParameter(root_graph, memo);
+  EraseLabel(root_graph);
 }
 
-void AscendControlParser::ChildGraphDataAssign(const std::map<uint32_t, KernelGraphPtr> &graph_id_map) {
-  for (auto &iter : graph_id_map) {
-    auto &kg = iter.second;
-    MS_LOG(INFO) << "Data assign graph:" << kg->graph_id();
-    MS_EXCEPTION_IF_NULL(kg);
-    std::set<std::pair<AnfNodePtr, AnfNodePtr>> memo;
-    const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-    for (auto &it : real_inputs) {
-      auto &parameter = it.first;
-      auto &args = it.second;
-      for (auto &arg : args) {
-        MS_EXCEPTION_IF_NULL(arg);
-        if (memo.find({parameter, arg}) != memo.end()) {
-          continue;
-        } else {
-          memo.emplace(parameter, arg);
-        }
-        auto unreuse_args_map = kg->unreuse_args();
-        auto unreuse_arg_iter = unreuse_args_map.find(arg);
-        if (unreuse_arg_iter == unreuse_args_map.end()) {
-          MS_EXCEPTION_IF_NULL(arg);
-          MS_EXCEPTION_IF_NULL(parameter);
-          if (!arg->isa<Parameter>()) {
-            MS_LOG(EXCEPTION) << "Reused arg must be parameter, arg:" << arg->DebugString() << ".";
-          }
-          MS_LOG(DEBUG) << "Parameter should be reused, no need insert assign, parameter: " << parameter->DebugString()
-                        << ", arg:" << arg->DebugString();
+std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> AscendControlParser::ParseCallNode(
+  NotNull<CNodePtr> call_node) {
+  std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> ret;
+  if (!IsPrimitiveCNode(call_node.get(), prim::kPrimCall)) {
+    MS_LOG(EXCEPTION) << "Node " << call_node->DebugString() << " is not a call node.";
+  }
+  if (call_node->size() <= kCNodeCallArg) {
+    MS_LOG(EXCEPTION) << "Node " << call_node->DebugString() << " has invalid inputs size " << call_node->size();
+  }
+  const std::vector<AnfNodePtr> &call_node_inputs = call_node->inputs();
+  auto call_arg = call_node_inputs[kCNodeCallArg];
+  MS_EXCEPTION_IF_NULL(call_arg);
+  if (IsValueNode<KernelGraph>(call_arg)) {
+    ret.emplace_back(GetValueNode<KernelGraphPtr>(call_arg),
+                     std::vector<AnfNodePtr>(call_node_inputs.begin() + kCNodeCallArg + 1, call_node_inputs.end()));
+  } else if (IsPrimitiveCNode(call_arg, prim::kPrimSwitch)) {
+    auto switch_cnode = call_arg->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(switch_cnode);
+    const std::vector<AnfNodePtr> &switch_inputs = switch_cnode->inputs();
+    if (switch_inputs.size() <= kCNodeSwitchCond) {
+      MS_LOG(EXCEPTION) << "Node " << switch_cnode->DebugString() << " has invalid inputs size "
+                        << switch_inputs.size();
+    }
+    for (auto iter = switch_inputs.begin() + kCNodeSwitchCond + 1; iter != switch_inputs.end(); ++iter) {
+      const auto &[target_graph, args] = ParsePartial(NOT_NULL(*iter));
+      ret.emplace_back(target_graph, args);
+    }
+  } else {
+    MS_LOG(EXCEPTION) << "Unsupport call node: " << call_node->DebugString(5);
+  }
+  return ret;
+}
+
+void AscendControlParser::ChildGraphDataAssign(
+  NotNull<KernelGraphPtr> kg, const NotNull<std::vector<std::pair<AnfNodePtr, AnfNodePtr>> *> link_list,
+  const NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(kg) != memo->end()) {
+    return;
+  }
+  memo->insert(kg.get());
+
+  MS_LOG(INFO) << "Start link data for " << kg->ToString();
+  const std::vector<CNodePtr> &nodes = kg->execution_order();
+
+  for (auto &node : nodes) {
+    if (!IsPrimitiveCNode(node, prim::kPrimCall)) {
+      continue;
+    }
+
+    auto child_graph_list = ParseCallNode(NOT_NULL(node));
+    for (auto &[child_graph, args] : child_graph_list) {
+      MS_EXCEPTION_IF_NULL(child_graph);
+      const std::vector<AnfNodePtr> &params = child_graph->inputs();
+      if (args.size() != params.size()) {
+        MS_LOG(EXCEPTION) << child_graph->ToString() << " needs " << params.size() << " inputs but call node "
+                          << node->DebugString(5) << " gives " << args.size();
+      }
+      for (size_t i = 0; i < args.size(); ++i) {
+        if (args[i]->isa<Parameter>() && memo->find(child_graph) == memo->end()) {
+          MS_LOG(INFO) << args[i]->DebugString() << " to " << params[i]->DebugString()
+                       << " should be reused, continue.";
+          link_list->emplace_back(args[i], params[i]);
           continue;
         }
-        auto target_graph_iter = graph_id_map.find(AnfAlgo::GetGraphId(arg.get()));
-        if (target_graph_iter == graph_id_map.end()) {
-          MS_LOG(EXCEPTION) << "Graph id " << AnfAlgo::GetGraphId(arg.get()) << " not found.";
-        }
-        InsertMultipleAssignToGraph(NOT_NULL(target_graph_iter->second), NOT_NULL(kg), NOT_NULL(arg),
-                                    NOT_NULL(parameter));
+
+        InsertMultipleAssignToGraph(kg, node, NOT_NULL(args[i]), NOT_NULL(params[i]));
       }
     }
-    kg->SetExecOrderByDefault();
+  }
+  kg->SetExecOrderByDefault();
+  for (auto &child_graph : kg->child_graph_order()) {
+    ChildGraphDataAssign(NOT_NULL(child_graph), link_list, memo);
   }
 }
 
@@ -325,7 +471,7 @@ void AscendControlParser::InsertDependToGraph(NotNull<KernelGraphPtr> kg, NotNul
   std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
                                     return_node->input(kFirstDataInputIndex), attch_node.get()};
   auto depend_node = kg->NewCNode(inputs);
-  return_node->set_input(1, depend_node);
+  return_node->set_input(kFirstDataInputIndex, depend_node);
 }
 
 void AscendControlParser::InsertControlDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> first_node,
@@ -381,6 +527,7 @@ void AscendControlParser::RecurseCall(NotNull<KernelGraphPtr> kg, NotNull<CNodeP
   new_inputs.push_back(sub_label);
   cur_node->set_inputs(new_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>({call_kg}), cur_node.get());
   MS_LOG(INFO) << "Succeed processing call func " << cur_node->DebugString();
 }
 
@@ -409,9 +556,12 @@ void AscendControlParser::RecurseSwitch(NotNull<KernelGraphPtr> kg, NotNull<CNod
   std::vector<AnfNodePtr> new_switch_inputs = {
     std::make_shared<ValueNode>(std::make_shared<Primitive>(kLabelSwitchOpName)),
     origin_switch_inputs[kCNodeSwitchCond]};
+  std::vector<KernelGraphPtr> child_graphs;
   for (size_t i = kCNodeSwitchCond + 1; i < kCNodeSwitchLength; ++i) {
     // 3.1 branch kernel graph and args
-    KernelGraphPtr branch_fg = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    KernelGraphPtr branch_fg;
+    std::tie(branch_fg, std::ignore) = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    child_graphs.push_back(branch_fg);
     // 3.2 recurse sub graph
     CNodePtr branch_label = ProcessKernelGraph(NOT_NULL(branch_fg), cur_node, back_label, memo);
     new_switch_inputs.push_back(branch_label);
@@ -420,6 +570,7 @@ void AscendControlParser::RecurseSwitch(NotNull<KernelGraphPtr> kg, NotNull<CNod
 
   cur_node->set_inputs(new_switch_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>(child_graphs), cur_node.get());
   MS_LOG(INFO) << "Succeed processing switch func " << cur_node->DebugString();
 }
 
@@ -453,9 +604,12 @@ void AscendControlParser::RecurseSwitchLayer(NotNull<KernelGraphPtr> kg, NotNull
   std::vector<AnfNodePtr> new_switch_inputs = {
     std::make_shared<ValueNode>(std::make_shared<Primitive>(kLabelSwitchOpName)),
     origin_switch_inputs[kCNodeSwitchCond]};
+  std::vector<KernelGraphPtr> child_graphs;
   for (size_t i = 0; i < branch_partial.size(); ++i) {
     // 3.1 branch kernel graph and args
-    KernelGraphPtr branch_fg = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    KernelGraphPtr branch_fg;
+    std::tie(branch_fg, std::ignore) = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    child_graphs.push_back(branch_fg);
     // 3.2 recurse sub graph
     CNodePtr branch_label = ProcessKernelGraph(NOT_NULL(branch_fg), cur_node, back_label, memo);
     new_switch_inputs.push_back(branch_label);
@@ -463,13 +617,14 @@ void AscendControlParser::RecurseSwitchLayer(NotNull<KernelGraphPtr> kg, NotNull
   new_switch_inputs.insert(new_switch_inputs.end(), branch_partial.begin(), branch_partial.end());
   cur_node->set_inputs(new_switch_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>(child_graphs), cur_node.get());
   MS_LOG(INFO) << "Succeed processing switch layer " << cur_node->DebugString();
 }
 
-KernelGraphPtr AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
+std::tuple<KernelGraphPtr, std::vector<AnfNodePtr>> AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
   if (!node.get()->isa<CNode>()) {
     if (IsValueNode<KernelGraph>(node)) {
-      return GetValueNode<KernelGraphPtr>(node);
+      return {GetValueNode<KernelGraphPtr>(node), {}};
     }
     MS_LOG(EXCEPTION) << "Switch branches must be partial, node: " << node->DebugString();
   }
@@ -485,12 +640,11 @@ KernelGraphPtr AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
     MS_LOG(EXCEPTION) << "Index out of range:" << partial_inputs.size() << ".";
   }
   auto branch_kg = GetValueNode<KernelGraphPtr>(partial_inputs[kCNodePartialFunc]);
-  return branch_kg;
+  return {branch_kg, std::vector<AnfNodePtr>(partial_inputs.begin() + kCNodePartialFunc + 1, partial_inputs.end())};
 }
 
-void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph,
-                                                      NotNull<KernelGraphPtr> to_graph, NotNull<AnfNodePtr> from,
-                                                      NotNull<AnfNodePtr> to) {
+void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, const AnfNodePtr &jump_node,
+                                                      NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to) {
   std::vector<AnfNodePtr> from_outputs = AnfAlgo::GetAllOutput(from, {prim::kPrimTupleGetItem});
   std::vector<AnfNodePtr> to_outputs = AnfAlgo::GetAllOutput(to, {prim::kPrimTupleGetItem});
   MS_LOG(INFO) << "Insert multi-assign from [" << from->DebugString() << "] to [" << to->DebugString() << "]";
@@ -500,22 +654,35 @@ void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> fr
   }
   for (size_t i = 0; i < from_outputs.size(); i++) {
     auto assign_node = InsertAssignToGraph(from_graph, NOT_NULL(from_outputs[i]), NOT_NULL(to_outputs[i]));
-    if (assign_node != nullptr) {
-      auto jump_node = GetJumpNode(from_graph, to_graph);
-      const auto &from_graph_exe_order = from_graph->execution_order();
-      auto jump_node_iter = std::find(from_graph_exe_order.begin(), from_graph_exe_order.end(), jump_node);
-      if (jump_node_iter == from_graph_exe_order.end()) {
-        MS_EXCEPTION_IF_NULL(jump_node);
-        MS_LOG(EXCEPTION) << "Can't find node:" << jump_node->DebugString() << " in graph:" << from_graph->graph_id();
-      }
-      // insert assign between jump_node -1 and jump_node
-      if (jump_node_iter != from_graph_exe_order.begin()) {
-        InsertControlDependToGraph(from_graph, NOT_NULL(*(jump_node_iter - 1)), NOT_NULL(assign_node));
-      }
-      if (jump_node != nullptr) {
-        InsertControlDependToGraph(from_graph, NOT_NULL(assign_node), NOT_NULL(jump_node));
+    const auto &from_graph_exe_order = from_graph->execution_order();
+    std::vector<CNodePtr> real_exe_order(from_graph_exe_order.size());
+    size_t real_exe_order_size = 0;
+    std::copy_if(from_graph_exe_order.begin(), from_graph_exe_order.end(), real_exe_order.begin(),
+                 [&real_exe_order_size](const CNodePtr &node) -> bool {
+                   return (IsPrimitiveCNode(node, prim::kPrimSwitch) || IsPrimitiveCNode(node, prim::kPrimPartial))
+                            ? false
+                            : (++real_exe_order_size, true);
+                 });
+    real_exe_order.resize(real_exe_order_size);
+    if (jump_node == nullptr) {
+      if (!real_exe_order.empty()) {
+        InsertControlDependToGraph(from_graph, NOT_NULL(*(real_exe_order.rbegin())), NOT_NULL(assign_node));
+      } else {
+        InsertDependToGraph(from_graph, NOT_NULL(assign_node));
       }
+      continue;
+    }
+
+    auto jump_node_iter = std::find(real_exe_order.begin(), real_exe_order.end(), jump_node);
+    if (jump_node_iter == real_exe_order.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find jump node " << jump_node->DebugString() << " in graph "
+                        << from_graph->ToString();
     }
+    // insert assign between jump_node -1 and jump_node
+    if (jump_node_iter != real_exe_order.begin()) {
+      InsertControlDependToGraph(from_graph, NOT_NULL(*(jump_node_iter - 1)), NOT_NULL(assign_node));
+    }
+    InsertControlDependToGraph(from_graph, NOT_NULL(assign_node), NOT_NULL(jump_node));
   }
 }
 
@@ -618,26 +785,45 @@ bool AscendControlParser::CheckLabelIndex(uint32_t order_index, uint32_t label_i
   }
 }
 
-void AscendControlParser::UpdateChildGraphOrder(NotNull<KernelGraphPtr> kg) {
-  MS_LOG(INFO) << "Graph id:" << kg->graph_id();
-  kg->SetExecOrderByDefault();
-  auto call_nodes = kg->FindNodeByPrimitive(std::make_shared<Primitive>(prim::kPrimCall->name()));
-  std::vector<KernelGraphPtr> child_graph_order;
-  for (auto &call_node : call_nodes) {
-    MS_EXCEPTION_IF_NULL(call_node);
-    auto call_child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node->cast<CNodePtr>());
-    for (const auto &child_graph : call_child_graphs) {
-      MS_EXCEPTION_IF_NULL(child_graph);
-      if (child_graph != kg->parent_graph()) {
-        child_graph->set_parent_graph(kg.get());
-      }
-      child_graph_order.push_back(child_graph);
-    }
+void AscendControlParser::ReferenceCounter::AddReadCount(const AnfNodePtr &key, int32_t num) {
+  auto iter = count_.find(key);
+  if (iter != count_.end()) {
+    iter->second.first += num;
+  } else {
+    count_[key] = {num, 0};
   }
-  for (size_t i = 0; i < child_graph_order.size(); i++) {
-    MS_LOG(INFO) << "Child graph[" << i << "][id:" << child_graph_order[i]->graph_id() << "]";
+}
+
+void AscendControlParser::ReferenceCounter::AddWriteCount(const AnfNodePtr &key, int32_t num) {
+  auto iter = count_.find(key);
+  if (iter != count_.end()) {
+    iter->second.second += num;
+  } else {
+    count_[key] = {0, num};
+  }
+}
+
+void AscendControlParser::ReferenceCounter::EraseElem(const AnfNodePtr &key) { count_.erase(key); }
+
+bool AscendControlParser::ReferenceCounter::HasValidElem() const {
+  auto it = std::find_if(count_.begin(), count_.end(),
+                         [this](const std::pair<AnfNodePtr, std::pair<uint32_t, uint32_t>> &p) -> bool {
+                           auto &[read, written] = p.second;
+                           return predicate_(read, written);
+                         });
+  return it != count_.end();
+}
+
+std::tuple<AnfNodePtr, int32_t, int32_t> AscendControlParser::ReferenceCounter::GetOneValidElem() const {
+  auto it = std::find_if(count_.begin(), count_.end(),
+                         [this](const std::pair<AnfNodePtr, std::pair<uint32_t, uint32_t>> &p) -> bool {
+                           auto &[read, written] = p.second;
+                           return predicate_(read, written);
+                         });
+  if (it == count_.end()) {
+    MS_LOG(EXCEPTION) << "No valid parameter.";
   }
-  kg->set_child_graph_order(child_graph_order);
+  return {it->first, it->second.first, it->second.second};
 }
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_control_parser.h b/mindspore/ccsrc/backend/session/ascend_control_parser.h
similarity index 70%
rename from mindspore/ccsrc/session/ascend_control_parser.h
rename to mindspore/ccsrc/backend/session/ascend_control_parser.h
index 7530f2019e..ac24735139 100644
--- a/mindspore/ccsrc/session/ascend_control_parser.h
+++ b/mindspore/ccsrc/backend/session/ascend_control_parser.h
@@ -20,7 +20,9 @@
 #include <map>
 #include <vector>
 #include <tuple>
-#include "session/kernel_graph.h"
+#include <utility>
+#include <functional>
+#include "backend/session/kernel_graph.h"
 #include "utils/base_ref.h"
 #include "utils/contract.h"
 #include "utils/union_find_set.h"
@@ -29,16 +31,23 @@ namespace mindspore {
 namespace session {
 class AscendControlParser {
  public:
-  static void ChildGraphDataAssign(const std::map<uint32_t, KernelGraphPtr> &graph_id_map);
   static void LinkGraph(NotNull<KernelGraphPtr> kg);
 
   static void InsertDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> attch_node);
   static void InsertControlDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> first_node,
                                          NotNull<AnfNodePtr> second_node);
   static void ExecutorValidate(NotNull<KernelGraphPtr> root_graph);
-  static void UpdateChildGraphOrder(NotNull<KernelGraphPtr> kg);
+  static void InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, const AnfNodePtr &jump_node,
+                                          NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
 
  private:
+  class ReferenceCounter;
+
+  static void EraseParameter(NotNull<KernelGraphPtr> root_graph, const std::set<KernelGraphPtr> &graph_list);
+  static void EraseLabel(NotNull<KernelGraphPtr> root_graph);
+  static void ChildGraphDataAssign(NotNull<KernelGraphPtr> kg,
+                                   const NotNull<std::vector<std::pair<AnfNodePtr, AnfNodePtr>> *> link_list,
+                                   const NotNull<std::set<KernelGraphPtr> *> memo);
   static NotNull<CNodePtr> GetStartLabel(NotNull<KernelGraphPtr> kg, const CNodePtr &last_node,
                                          const CNodePtr &last_label);
   static NotNull<CNodePtr> ProcessKernelGraph(NotNull<KernelGraphPtr> kg, const CNodePtr &last_node,
@@ -53,11 +62,10 @@ class AscendControlParser {
 
   static void LinkParentGraph(NotNull<KernelGraphPtr> kg, const CNodePtr &from_graph_call_node,
                               const CNodePtr &last_label);
-  static KernelGraphPtr ParsePartial(NotNull<AnfNodePtr> node);
 
-  static void InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, NotNull<KernelGraphPtr> to_graph,
-                                          NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
   static AnfNodePtr InsertAssignToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
+  static std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> ParseCallNode(NotNull<CNodePtr> call_node);
+  static std::tuple<KernelGraphPtr, std::vector<AnfNodePtr>> ParsePartial(NotNull<AnfNodePtr> node);
 
   // root graph order
   static bool CheckLabelIndex(uint32_t order_index, uint32_t label_index, const CNodePtr &cnode,
@@ -65,6 +73,19 @@ class AscendControlParser {
   static std::vector<CNodePtr> RecurseGraph(NotNull<KernelGraphPtr> graph,
                                             const NotNull<std::set<KernelGraphPtr> *> memo);
 };
+class AscendControlParser::ReferenceCounter {
+ public:
+  explicit ReferenceCounter(std::function<bool(int32_t, int32_t)> func) : predicate_(func), count_() {}
+  void AddReadCount(const AnfNodePtr &key, int32_t num);
+  void AddWriteCount(const AnfNodePtr &key, int32_t num);
+  void EraseElem(const AnfNodePtr &key);
+  bool HasValidElem() const;
+  std::tuple<AnfNodePtr, int32_t, int32_t> GetOneValidElem() const;
+
+ private:
+  std::function<bool(int32_t, int32_t)> predicate_;
+  std::map<AnfNodePtr, std::pair<int32_t, int32_t>> count_;
+};
 }  // namespace session
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/session/ascend_inference_session.cc b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
similarity index 50%
rename from mindspore/ccsrc/session/ascend_inference_session.cc
rename to mindspore/ccsrc/backend/session/ascend_inference_session.cc
index aef7738d0b..d251eb2039 100644
--- a/mindspore/ccsrc/session/ascend_inference_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
@@ -13,81 +13,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/ascend_inference_session.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_inference_session.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
-#include "ir/tensor_py.h"
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
-#include "device/kernel_runtime.h"
-#include "session/anf_runtime_algorithm.h"
+#include "ir/param_value.h"
+#include "runtime/device/kernel_runtime.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "common/trans.h"
-#include "kernel/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
 #include "utils/config_manager.h"
 #include "utils/base_ref_extends.h"
 
-using mindspore::tensor::TensorPy;
-
 namespace mindspore {
 namespace session {
-namespace {
-std::set<AnfNodePtr> weight_infos;
-static TypeId GetDataType(const py::buffer_info &buf) {
-  if (buf.format.size() == 1) {
-    switch (buf.format.front()) {
-      case 'e':
-      case 'f':
-      case 'd':
-        switch (buf.itemsize) {
-          case 2:
-            return TypeId::kNumberTypeFloat16;
-          case 4:
-            return TypeId::kNumberTypeFloat32;
-          case 8:
-            return TypeId::kNumberTypeFloat64;
-        }
-        break;
-      case 'b':
-      case 'h':
-      case 'i':
-      case 'l':
-      case 'q':
-        switch (buf.itemsize) {
-          case 1:
-            return TypeId::kNumberTypeInt8;
-          case 2:
-            return TypeId::kNumberTypeInt16;
-          case 4:
-            return TypeId::kNumberTypeInt32;
-          case 8:
-            return TypeId::kNumberTypeInt64;
-        }
-        break;
-      case 'B':
-      case 'H':
-      case 'I':
-      case 'L':
-      case 'Q':
-        switch (buf.itemsize) {
-          case 1:
-            return TypeId::kNumberTypeUInt8;
-          case 2:
-            return TypeId::kNumberTypeUInt16;
-          case 4:
-            return TypeId::kNumberTypeUInt32;
-          case 8:
-            return TypeId::kNumberTypeUInt64;
-        }
-        break;
-      case '?':
-        return TypeId::kNumberTypeBool;
-    }
-  }
-  MS_LOG(WARNING) << "Unsupported DataType format " << buf.format << " item size " << buf.itemsize;
-  return TypeId::kTypeUnknown;
-}
-}  // namespace
 void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                            const std::vector<tensor::TensorPtr> &inputs_const) const {
   MS_EXCEPTION_IF_NULL(kernel_graph);
@@ -105,24 +45,37 @@ void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &k
     MS_EXCEPTION_IF_NULL(pk_node);
     auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
     MS_EXCEPTION_IF_NULL(device_address);
-    if (AnfAlgo::IsParameterWeight(pk_node)) {
-      if (weight_infos.count(pk_node) != 0) {
-        continue;
-      }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(pk_node->default_param());
-      MS_EXCEPTION_IF_NULL(param_value);
-      auto py_param = param_value->value();
-      MS_EXCEPTION_IF_NULL(py_param);
-      py::array py_array = py_param.cast<py::array>();
-      py::buffer_info buf = py_array.request();
-      auto buf_type = GetDataType(buf);
+    if (!AnfAlgo::IsParameterWeight(pk_node)) {
+      tensor = inputs[no_weight_input++];
       if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
-                                            LongToSize(buf.size * buf.itemsize), buf_type, buf.ptr)) {
+                                            LongToSize(tensor->data().nbytes()), tensor->data_type(),
+                                            tensor->data_c())) {
         MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
       }
-      weight_infos.insert(pk_node);
-    } else {
-      tensor = inputs[no_weight_input++];
+    }
+  }
+}
+
+GraphId AscendInferenceSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
+  auto graph_id = AscendSession::CompileGraph(func_graph);
+  auto kernel_graph = GetGraph(graph_id);
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  // load weight data to device
+  auto input_nodes = kernel_graph->inputs();
+  for (size_t i = 0; i < input_nodes.size(); ++i) {
+    if (!input_nodes[i]->isa<Parameter>()) {
+      MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter";
+      continue;
+    }
+    auto pk_node = input_nodes[i]->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(pk_node);
+    auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
+    MS_EXCEPTION_IF_NULL(device_address);
+    if (AnfAlgo::IsParameterWeight(pk_node)) {
+      const auto &param_value = pk_node->default_param();
+      MS_EXCEPTION_IF_NULL(param_value);
+      auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_value->value());
+      MS_EXCEPTION_IF_NULL(tensor);
       if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
                                             LongToSize(tensor->data().nbytes()), tensor->data_type(),
                                             tensor->data_c())) {
@@ -130,6 +83,7 @@ void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &k
       }
     }
   }
+  return graph_id;
 }
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_inference_session.h b/mindspore/ccsrc/backend/session/ascend_inference_session.h
similarity index 82%
rename from mindspore/ccsrc/session/ascend_inference_session.h
rename to mindspore/ccsrc/backend/session/ascend_inference_session.h
index 53be881f93..5364ae8d4e 100644
--- a/mindspore/ccsrc/session/ascend_inference_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.h
@@ -24,11 +24,11 @@
 #include <map>
 #include <tuple>
 #include <set>
-#include "session/ascend_session.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
-#include "session/session_factory.h"
-#include "session/ascend_control_parser.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/session_factory.h"
+#include "backend/session/ascend_control_parser.h"
 
 namespace mindspore {
 namespace session {
@@ -38,6 +38,7 @@ class AscendInferenceSession : public AscendSession {
   ~AscendInferenceSession() = default;
   void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                      const std::vector<tensor::TensorPtr> &inputs_const) const;
+  GraphId CompileGraph(NotNull<FuncGraphPtr> func_graph) override;
 };
 MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession);
 }  // namespace session
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc
similarity index 86%
rename from mindspore/ccsrc/session/ascend_session.cc
rename to mindspore/ccsrc/backend/session/ascend_session.cc
index f361cb26ca..75bc4e2d05 100644
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -13,37 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/ascend_session.h"
+#include "backend/session/ascend_session.h"
 #include <algorithm>
 #include <map>
 #include <tuple>
 #include <set>
 #include <string>
 #include <list>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/anf.h"
 #include "common/trans.h"
-#include "device/kernel_runtime.h"
-#include "device/ascend/kernel_select_ascend.h"
-#include "device/ascend/kernel_build_ascend.h"
-#include "device/ascend/ascend_kernel_runtime.h"
-#include "device/ascend/ascend_device_address.h"
-#include "pre_activate/ascend/ascend_backend_optimization.h"
-#include "pre_activate/common/common_backend_optimization.h"
-#include "device/kernel_adjust.h"
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_label_assign.h"
+#include "runtime/device/kernel_runtime.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/kernel_build_ascend.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "backend/optimizer/ascend/ascend_backend_optimization.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
+#include "runtime/device/kernel_adjust.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
 #include "predict/predict.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/scalar.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/draw.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_runtime_manager.h"
-#include "kernel/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
 #include "utils/config_manager.h"
 #include "utils/base_ref_extends.h"
 #include "debug/tensor_load.h"
@@ -51,6 +51,7 @@
 namespace mindspore {
 namespace session {
 const size_t kInvalidIndex = SIZE_MAX;
+constexpr size_t kReturnDataIndex = 1;
 namespace {
 void DumpGraphExeOrder(const std::vector<CNodePtr> &execution_order, const std::string &tag = "") {
   MS_LOG(INFO) << "Dump execution_order size " << execution_order.size();
@@ -288,6 +289,17 @@ static void RecurseToUpdateCallRealInput(NotNull<KernelGraphPtr> graph,
   // this action should from bottom to top
   graph->UpdateCallRealInput();
 }
+
+void InsertMakeTupleForOutput(NotNull<KernelGraphPtr> root_graph) {
+  auto return_node = root_graph->get_return();
+  MS_EXCEPTION_IF_NULL(return_node);
+  if (return_node->size() <= kReturnDataIndex) {
+    return;
+  }
+  auto make_tuple = root_graph->NewCNode(
+    {NewValueNode(std::make_shared<Primitive>(prim::kPrimMakeTuple->name())), root_graph->output()});
+  root_graph->set_output(make_tuple);
+}
 }  // namespace
 
 GraphId AscendSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
@@ -307,19 +319,36 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
   // empty graph dont entry to backend
   if (root_graph->execution_order().empty()) {
     MS_LOG(INFO) << root_graph->ToString() << " is empty graph.";
+    InsertMakeTupleForOutput(NOT_NULL(root_graph));
     root_graph->set_executable(false);
     InitRuntimeResource();
     return root_graph->graph_id();
   }
-  // split switch
-  SplitGraphs(NOT_NULL(root_graph));
+  // create parameter for multiple branch
+  std::set<KernelGraphPtr> memo;
+  CreateMultiBranchOutput(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
   // insert goto labels and label_sets
   LinkChildGraphs(NOT_NULL(root_graph));
   // resource initialize
   InitRuntimeResource();
-  // recurse compile child root_graph
-  std::set<KernelGraphPtr> memo;
-  RecurseCompileGraph(NOT_NULL(root_graph), NOT_NULL(&memo));
+
+  IrFusionPass(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  SelectKernel(NOT_NULL(root_graph));
+  memo.clear();
+
+  HardwareOptimize(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  AssignStaticMemory(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  UpdateRefOutputMap(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+  // add make_tuple to the output graph
+  InsertMakeTupleForOutput(NOT_NULL(root_graph));
   // root root_graph valiate,include genearte execute order and so on
   RootGraphExecutorValidate(NOT_NULL(root_graph));
   // adjust kernel
@@ -330,12 +359,18 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
   device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get()));
   // build kernel
   BuildKernel(root_graph);
+#ifdef ENABLE_DEBUGGER
+  if (debugger_) {
+    debugger_->PreExecute(root_graph);
+  }
+#endif
   // alloc mem
   MemoryAlloc(root_graph.get());
   // task generate
   GenerateTaskInfo(root_graph);
   // load task into device
   LoadTask(root_graph);
+  DumpAllGraphs(all_graphs);
   // return the root_graph id to backend
   auto graph_id = root_graph->graph_id();
   return graph_id;
@@ -405,6 +440,11 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   BuildKernel(graph);
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
+#ifdef ENABLE_DEBUGGER
+  if (debugger_) {
+    debugger_->PreExecute(graph);
+  }
+#endif
   if (ms_context->precompile_only()) {
     MS_LOG(INFO) << "Precompile only, stop in build kernel step";
   } else {
@@ -417,7 +457,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   }
   // sync the inital const tensor to device
   SyncInitialTenosrToDevice();
-  ExportChildGraphs(graph_id);
+  DumpAllGraphs({graph});
   MS_LOG(INFO) << "End";
 }
 
@@ -473,12 +513,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
   LoadInputData(kernel_graph, inputs);
   // convert inputs to model
   predictmodel::StepConvertWeight(inputs);
-#ifdef ENABLE_DEBUGGER
-  // debugger pre-execution processing
-  if (debugger_) {
-    debugger_->PreExecute(kernel_graph);
-  }
-#endif
   {
     py::gil_scoped_release release;
     // run task on device
@@ -761,7 +795,7 @@ void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const
   MS_LOG(INFO) << "Finish!";
 }
 
-void AscendSession::ExportChildGraphs(const GraphId graph_id) {
+void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs) {
 #ifdef ENABLE_DUMP_IR
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -773,21 +807,11 @@ void AscendSession::ExportChildGraphs(const GraphId graph_id) {
   if (save_graphs_path.empty()) {
     save_graphs_path = ".";
   }
-  if (graph_id == final_graph_id_) {
-    const auto &graph_order = GetGraphOrder(final_graph_id_);
-    const auto &graph_type = GetGraphOrderType(final_graph_id_);
-    for (size_t i = 0; i < graph_order.size(); i++) {
-      if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) {
-        continue;
-      }
-      const auto child_graph = GetGraph(graph_order[i]);
-      MS_LOG(DEBUG) << "Start export child graph " << graph_order[i];
-      MS_EXCEPTION_IF_NULL(child_graph);
-      std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(child_graph->graph_id()) + ".ir";
-      DumpIR(file_path, child_graph, true);
-      DumpIRProto(child_graph, "vm_build_" + std::to_string(child_graph->graph_id()));
-      MS_LOG(DEBUG) << "End export child graph " << graph_order[i];
-    }
+  for (auto &graph : all_graphs) {
+    MS_EXCEPTION_IF_NULL(graph);
+    std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph, true);
+    DumpIRProto(graph, "vm_build_" + std::to_string(graph->graph_id()));
   }
 #endif
 }
@@ -798,12 +822,14 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
 #ifdef ENABLE_DEBUGGER
   auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
   MS_EXCEPTION_IF_NULL(runtime_instance);
-  DebugServices *debug_services = debugger_->get_debug_services();
-  TensorLoader *tensor_loader = debug_services->get_tensor_loader();
+  DebugServices *debug_services = debugger_->debug_services();
+  TensorLoader *tensor_loader = debug_services->tensor_loader();
+  // TensorData will be freed up here
   tensor_loader->EmptyTensor();
   uint32_t iter_num = tensor_loader->GetIterNum();
   tensor_loader->set_iter_num(++iter_num);
   (void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get());
+  tensor_loader->EmptyPrevTensor();
 #endif
   MS_LOG(INFO) << "Finish!";
 }
@@ -1027,7 +1053,7 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true
   // append switch at the end of condition graph
   auto return_node = condition_graph->get_return();
   MS_EXCEPTION_IF_NULL(return_node);
-  InsertControlDependToGraph(condition_graph_id, return_node->input(1), switch_node);
+  InsertControlDependToGraph(condition_graph_id, return_node->input(kReturnDataIndex), switch_node);
   MS_LOG(INFO) << "Finish!";
 }
 
@@ -1477,7 +1503,7 @@ void AscendSession::InsertStreamActiveToGraph(GraphId graph_id, uint32_t actived
   // append the active node at the end of from graph
   auto return_node = from_graph->get_return();
   MS_EXCEPTION_IF_NULL(return_node);
-  InsertControlDependToGraph(graph_id, return_node->input(1), active_node);
+  InsertControlDependToGraph(graph_id, return_node->input(kReturnDataIndex), active_node);
 }
 
 void AscendSession::InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node) {
@@ -1630,6 +1656,10 @@ void AscendSession::BackendOptimization(const std::vector<KernelGraphPtr> &all_g
 
 void AscendSession::SplitGraphs(NotNull<KernelGraphPtr> root_graph) {
   std::set<KernelGraphPtr> memo;
+  // if output of graph is nullptr,no need insert maketuple at the end of graph
+  if (root_graph->output() == nullptr) {
+    return;
+  }
   // if root graph output is a call node ,the root graph is condition graph of 'if' sentence
   auto root_graph_output = AnfAlgo::VisitKernelWithReturnType(root_graph->output(), 0).first;
   if (AnfAlgo::CheckPrimitiveType(root_graph_output, prim::kPrimCall)) {
@@ -1680,7 +1710,7 @@ void AscendSession::SplitGraph(NotNull<KernelGraphPtr> graph, const std::set<Pri
   bool split_flag = false;
   auto apply_list = GetCNodes(TopoSort(graph->get_return()));
   // update the root graph child graph order
-  AscendControlParser::UpdateChildGraphOrder(graph);
+  graph->UpdateChildGraphOrder();
   // get child list from current graph
   std::vector<std::vector<CNodePtr>> child_graph_lists = GetChildList(apply_list, cut_prims);
   if (child_graph_lists.size() > 1) {
@@ -1712,7 +1742,7 @@ void AscendSession::SplitGraph(NotNull<KernelGraphPtr> graph, const std::set<Pri
     }
     split_flag = true;
   }
-  AscendControlParser::UpdateChildGraphOrder(graph);
+  graph->UpdateChildGraphOrder();
   UpdateRealInput(graph, split_flag, memo);
   MS_LOG(INFO) << "Split graph[" << graph->graph_id() << "] end";
 }
@@ -1751,5 +1781,216 @@ void AscendSession::RecurseCompileGraph(NotNull<KernelGraphPtr> graph, const Not
     }
   }
 }
+
+void AscendSession::CreateMultiBranchOutput(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(graph.get()) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  graph->UpdateChildGraphOrder();
+  for (auto &child_graph : graph->child_graph_order()) {
+    CreateMultiBranchOutput(NOT_NULL(child_graph), memo);
+  }
+
+  std::map<AnfNodePtr, AnfNodePtr> need_replace_list;
+  auto node_list = GetCNodes(TopoSort(graph->get_return()));
+  for (auto &node : node_list) {
+    if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimCall)) {
+      // create a parameter to store the output of multiple branch and set the parameter as the condition graph's output
+      // auto multi_output_param = graph->NewParameter();
+      auto origin_inputs = graph->inputs();
+      auto output_param = CreateNewParameterFromCNode(node, true, graph.get().get());
+      MS_EXCEPTION_IF_NULL(graph->MutableInputs());
+      graph->MutableInputs()->operator=(origin_inputs);
+      graph->AddChildGraphResult(output_param);
+
+      std::vector<AnfNodePtr> depend_inputs = {
+        graph->NewValueNode(NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name()))), output_param, node};
+      auto depend = graph->NewCNode(depend_inputs);
+      need_replace_list.emplace(node, depend);
+      MS_LOG(INFO) << "Create parameter " << output_param->DebugString() << " for call node " << node->DebugString()
+                   << ", depend node is " << depend->DebugString();
+      // insert assign in order to transfer child graph output to parameter
+      auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(node);
+      for (auto &child_graph : child_graphs) {
+        MS_EXCEPTION_IF_NULL(child_graph);
+        if (child_graph->get_output_null()) {
+          continue;
+        }
+        auto graph_output = child_graph->output();
+        AscendControlParser::InsertMultipleAssignToGraph(NOT_NULL(child_graph), nullptr, NOT_NULL(graph_output),
+                                                         NOT_NULL(output_param));
+      }
+    }
+  }
+  // searching for nodes' input to replace call by depend(parameter, call)
+  for (auto &node : node_list) {
+    for (size_t i = 0; i < node->size(); ++i) {
+      auto input = node->input(i);
+      auto iter = need_replace_list.find(input);
+      if (iter != need_replace_list.end()) {
+        node->set_input(i, iter->second);
+      }
+    }
+  }
+}
+
+void AscendSession::IrFusionPass(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  opt::AscendBackendIRFusionOptimization(graph);
+  opt::AscendBackendFuseBasicOpt(graph, true);
+  opt::AscendBackendGraphKernelOpt(graph, true);
+  graph->SetExecOrderByDefault();
+
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs) {
+    if (save_graphs_path.empty()) {
+      save_graphs_path = ".";
+    }
+    std::string file_path =
+      save_graphs_path + "/" + "select_kernel_before" + "_graph_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph.get());
+  }
+
+  for (auto &child_graph : graph->child_graph_order()) {
+    IrFusionPass(NOT_NULL(child_graph), memo);
+  }
+}
+
+void AscendSession::SelectKernel(NotNull<KernelGraphPtr> root_graph) {
+  MS_LOG(INFO) << "Start select kernel.";
+  size_t raise_precision_count = 0;
+  size_t reduce_precision_count = 0;
+
+  std::set<KernelGraphPtr> memo;
+  (void)RecurseSelectKernelInfo(root_graph, NOT_NULL(&memo), &raise_precision_count, &reduce_precision_count);
+  memo.clear();
+
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  if (ms_context->execution_mode() == kGraphMode) {
+    if (raise_precision_count > 0) {
+      MS_LOG(WARNING) << "There has " << raise_precision_count
+                      << " node/nodes used raise precision to selected the kernel!";
+    }
+    if (reduce_precision_count > 0) {
+      MS_LOG(WARNING) << "There has " << raise_precision_count
+                      << " node/nodes used reduce precision to selected the kernel!";
+    }
+  }
+  MS_LOG(INFO) << "Finish!";
+}
+
+void AscendSession::RecurseSelectKernelInfo(NotNull<KernelGraphPtr> graph,
+                                            NotNull<std::set<KernelGraphPtr> *> const memo,
+                                            size_t *const raise_precision_count,
+                                            size_t *const reduce_precision_count) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+  MS_LOG(INFO) << "Start to select kernel info in graph: " << graph->graph_id();
+
+  for (const auto &cnode : graph->execution_order()) {
+    if (AnfAlgo::IsCondControlKernel(cnode)) {
+      std::vector<KernelGraphPtr> child_graphs;
+      if (AnfAlgo::HasNodeAttr(kAttrChildGraph, cnode)) {
+        child_graphs = AnfAlgo::GetNodeAttr<std::vector<KernelGraphPtr>>(cnode, kAttrChildGraph);
+      }
+      for (auto &child_graph : child_graphs) {
+        RecurseSelectKernelInfo(NOT_NULL(child_graph), memo, raise_precision_count, reduce_precision_count);
+      }
+    }
+
+    auto status = device::ascend::SelectKernelInfo(cnode);
+    if (status == device::ascend::kStatusRaisePrecision) {
+      (*raise_precision_count)++;
+    } else if (status == device::ascend::kStatusReducePrecision) {
+      (*reduce_precision_count)++;
+    }
+    MS_LOG(INFO) << "Select ApplyKernel: " << cnode->DebugString();
+  }
+
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs) {
+    if (save_graphs_path.empty()) {
+      save_graphs_path = ".";
+    }
+    std::string file_path =
+      save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph.get());
+  }
+  MS_LOG(INFO) << "Finish selecting kernel info in graph: " << graph->graph_id();
+}
+
+void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
+                                     NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id();
+  // convert kernel Graph to model
+  predictmodel::StepConvertGraph(graph.get());
+
+  HardwareOptimize(graph.get());
+  for (auto &child_graph : graph->child_graph_order()) {
+    HardwareOptimize(NOT_NULL(child_graph), memo);
+  }
+  MS_LOG(INFO) << "Finish doing HardwareOptimize in graph: " << graph->graph_id();
+}
+
+void AscendSession::AssignStaticMemory(NotNull<KernelGraphPtr> graph,
+                                       NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  MS_LOG(INFO) << "Start to assign static memory for parameter in graph: " << graph->graph_id();
+  // assign static memory for parameters
+  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
+  MS_EXCEPTION_IF_NULL(runtime_instance);
+  runtime_instance->AssignStaticMemoryInput(graph.get().get());
+  runtime_instance->AssignStaticMemoryValueNode(graph.get().get());
+  for (auto &child_graph : graph->child_graph_order()) {
+    AssignStaticMemory(NOT_NULL(child_graph), memo);
+  }
+  MS_LOG(INFO) << "Finish assigning static memory for parameter in graph: " << graph->graph_id();
+}
+
+void AscendSession::UpdateRefOutputMap(NotNull<KernelGraphPtr> graph,
+                                       NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  for (auto &child_graph : graph->child_graph_order()) {
+    UpdateRefOutputMap(NOT_NULL(child_graph), memo);
+    // copy ref map to final graph
+    auto child_ref_map = child_graph->GetRefMap();
+    for (auto &item : child_ref_map) {
+      if (graph->IsInRefOutputMap(item.first)) {
+        MS_LOG(WARNING) << "The ref pair <" << item.first.first->DebugString() << ", " << item.first.second
+                        << "> is already in " << graph->ToString();
+        continue;
+      }
+      graph->AddRefCorrespondPairs(item.first, item.second);
+    }
+  }
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/backend/session/ascend_session.h
similarity index 88%
rename from mindspore/ccsrc/session/ascend_session.h
rename to mindspore/ccsrc/backend/session/ascend_session.h
index 531860c379..11cb1c92d2 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_session.h
@@ -24,11 +24,11 @@
 #include <map>
 #include <tuple>
 #include <set>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
-#include "session/session_factory.h"
-#include "session/ascend_control_parser.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/session_factory.h"
+#include "backend/session/ascend_control_parser.h"
 
 namespace mindspore {
 namespace session {
@@ -85,7 +85,7 @@ class AscendSession : public SessionBasic {
   void LoadTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const;
-  void ExportChildGraphs(const GraphId graph_id);
+  void DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs);
   void LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   // below functions are used for run op
   void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const;
@@ -151,6 +151,15 @@ class AscendSession : public SessionBasic {
   // sync intial tensors' data to device
   void SyncInitialTenosrToDevice();
   void SetFinalGraphSummaryFlag(const std::shared_ptr<KernelGraph> &kernel_graph);
+  // create parameter to receive data from multiple branch output
+  void CreateMultiBranchOutput(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo);
+  void SelectKernel(NotNull<KernelGraphPtr> root_graph);
+  void RecurseSelectKernelInfo(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> const memo,
+                               size_t *const raise_precision_count, size_t *const reduce_precision_count) const;
+  void IrFusionPass(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo);
+  void HardwareOptimize(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
+  void AssignStaticMemory(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
+  void UpdateRefOutputMap(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
 
   // member variables
   // key is final_graph_id,value is child graph execute order of final graph
diff --git a/mindspore/ccsrc/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc
similarity index 94%
rename from mindspore/ccsrc/session/cpu_session.cc
rename to mindspore/ccsrc/backend/session/cpu_session.cc
index 1927df2f49..ca1c78d206 100644
--- a/mindspore/ccsrc/session/cpu_session.cc
+++ b/mindspore/ccsrc/backend/session/cpu_session.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "session/cpu_session.h"
+#include "backend/session/cpu_session.h"
 #include <algorithm>
 #include "ir/tensor.h"
 #include "ir/anf.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_runtime.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_runtime.h"
 #include "predict/predict.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
-#include "device/cpu/kernel_select_cpu.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
diff --git a/mindspore/ccsrc/session/cpu_session.h b/mindspore/ccsrc/backend/session/cpu_session.h
similarity index 90%
rename from mindspore/ccsrc/session/cpu_session.h
rename to mindspore/ccsrc/backend/session/cpu_session.h
index 36b987e840..b0dbd1cc2b 100644
--- a/mindspore/ccsrc/session/cpu_session.h
+++ b/mindspore/ccsrc/backend/session/cpu_session.h
@@ -18,10 +18,10 @@
 #include <string>
 #include <memory>
 #include <vector>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "device/cpu/cpu_kernel_runtime.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "runtime/device/cpu/cpu_kernel_runtime.h"
+#include "backend/session/session_factory.h"
 namespace mindspore {
 namespace session {
 class CPUSession : public SessionBasic {
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
similarity index 92%
rename from mindspore/ccsrc/session/gpu_session.cc
rename to mindspore/ccsrc/backend/session/gpu_session.cc
index 7765e93758..14e30c1a44 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -13,19 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/gpu_session.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "device/gpu/gpu_kernel_build.h"
-#include "device/gpu/gpu_kernel_runtime.h"
-#include "device/gpu/gpu_stream_assign.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/pass/communication_op_fusion.h"
-#include "pre_activate/pass/getitem_tuple.h"
-#include "pre_activate/gpu/adam_weight_decay_fusion.h"
-#include "pre_activate/gpu/adam_fusion.h"
-#include "device/kernel_runtime_manager.h"
+#include "backend/session/gpu_session.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/gpu_kernel_build.h"
+#include "runtime/device/gpu/gpu_kernel_runtime.h"
+#include "runtime/device/gpu/gpu_stream_assign.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
+#include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
+#include "backend/optimizer/gpu/adam_fusion.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "predict/predict.h"
 #include "common/utils.h"
 #include "common/trans.h"
@@ -121,7 +121,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
     if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
-      auto tensor_address = tensor->device_address();
+      auto tensor_address = std::dynamic_pointer_cast<device::DeviceAddress>(tensor->device_address());
       bool need_sync = false;
       if (ms_context->enable_pynative_infer()) {
         if (tensor_address == nullptr || tensor_address != device_address) {
@@ -187,8 +187,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
   GetSummaryNodes(graph.get());
   // Remove NoOp from execution graph
   opt::RemoveNopNode(graph.get());
-  // Alloc memory, including static memory and dynamic memory
-  AllocateMemory(graph.get());
+  // Set graph manager.
   MS_EXCEPTION_IF_NULL(context_);
   FuncGraphManagerPtr manager = MakeManager({graph});
   context_->AddManager(manager);
@@ -196,6 +195,8 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
     manager->AddFuncGraph(graph);
     graph->set_manager(manager);
   }
+  // Alloc memory, including static memory and dynamic memory
+  AllocateMemory(graph.get());
   return graph_id;
 }
 
diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/backend/session/gpu_session.h
similarity index 95%
rename from mindspore/ccsrc/session/gpu_session.h
rename to mindspore/ccsrc/backend/session/gpu_session.h
index 4e46c2138d..7e07dfbcbd 100644
--- a/mindspore/ccsrc/session/gpu_session.h
+++ b/mindspore/ccsrc/backend/session/gpu_session.h
@@ -18,9 +18,9 @@
 
 #include <vector>
 #include <memory>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_factory.h"
 using KernelGraph = mindspore::session::KernelGraph;
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc
similarity index 95%
rename from mindspore/ccsrc/session/kernel_graph.cc
rename to mindspore/ccsrc/backend/session/kernel_graph.cc
index 264e2c661b..df810fe6ef 100644
--- a/mindspore/ccsrc/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include <algorithm>
 #include <queue>
 #include <unordered_set>
 #include <set>
-#include "operator/ops.h"
-#include "ir/param_value_py.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel_build_info.h"
-#include "device/kernel_runtime_manager.h"
-#include "kernel/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "ir/param_value.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace session {
@@ -380,9 +380,7 @@ ParameterPtr KernelGraph::NewParameter(const ParameterPtr &parameter) {
     new_parameter->set_abstract(parameter->abstract());
     new_parameter->set_name(parameter->name());
     if (AnfAlgo::IsParameterWeight(parameter)) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(parameter->default_param());
-      auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-      new_parameter->set_default_param(param_value_new);
+      new_parameter->set_default_param(parameter->default_param());
       kernel_info->SetFeatureMapFlag(false);
     } else {
       kernel_info->SetFeatureMapFlag(true);
@@ -618,8 +616,8 @@ void KernelGraph::UpdateControlDependRelations(const std::vector<AnfNodePtr> &de
     if (AnfAlgo::HasNodeAttr(kControlDependMode, cnode)) {
       depend_mode = AnfAlgo::GetNodeAttr<int>(cnode, kControlDependMode);
     }
-    MS_LOG(INFO) << "Prior node[" << prior_node->DebugString() << "], depend node[" << depend_node->DebugString()
-                 << "], depend_mode :" << depend_mode << ".";
+    MS_LOG(DEBUG) << "Prior node[" << prior_node->DebugString() << "], depend node[" << depend_node->DebugString()
+                  << "], depend_mode :" << depend_mode << ".";
     if (prior_node->isa<Parameter>() && depend_mode == 1) {
       prior_nodes = GetOutputNodes(prior_node);
     }
@@ -649,7 +647,8 @@ void KernelGraph::UpdateControlDependRelations(const std::vector<AnfNodePtr> &de
         }
         MS_EXCEPTION_IF_NULL(first_node);
         MS_EXCEPTION_IF_NULL(second_node);
-        MS_LOG(INFO) << "Add first node:" << first_node->DebugString() << ",second node:" << second_node->DebugString();
+        MS_LOG(DEBUG) << "Add first node:" << first_node->DebugString()
+                      << ",second node:" << second_node->DebugString();
         AddDependEdge(second_node, first_node, 1);
       }
     }
@@ -750,6 +749,10 @@ bool KernelGraph::RemoveValueNodeFromGraph(const ValueNodePtr &value_node) {
 
 void KernelGraph::ReplaceNode(NotNull<AnfNodePtr> old_anf_node, NotNull<AnfNodePtr> new_anf_node) {
   MS_EXCEPTION_IF_NULL(inputs_);
+  {
+    std::queue<AnfNodePtr> seed_nodes;
+    UpdateNodeEdgeList(&seed_nodes);
+  }
   auto it = node_output_edges_.find(old_anf_node);
   if (it != node_output_edges_.end()) {
     const auto &outputs = it->second;
@@ -780,8 +783,10 @@ void KernelGraph::ReplaceNode(NotNull<AnfNodePtr> old_anf_node, NotNull<AnfNodeP
     // update front to backend map
     FrontBackendlMapUpdate(old_anf_node, new_anf_node);
   }
-  // if change the ir of graph, regenerate execution order of graph
-  SetExecOrderByDefault();
+  {
+    std::queue<AnfNodePtr> seed_nodes;
+    UpdateNodeEdgeList(&seed_nodes);
+  }
   // update graph inputs in child graph
   auto it_real_inputs = std::find_if(real_inputs_.begin(), real_inputs_.end(),
                                      [&old_anf_node](const std::pair<AnfNodePtr, std::vector<AnfNodePtr>> &n) -> bool {
@@ -987,6 +992,30 @@ bool KernelGraph::IsFinalOutputKernel(const AnfNodePtr &node) const {
   return false;
 }
 
+void KernelGraph::UpdateChildGraphOrder() {
+  MS_LOG(INFO) << "Update " << ToString() << " child graph order.";
+  SetExecOrderByDefault();
+  auto call_nodes = FindNodeByPrimitive(std::make_shared<Primitive>(prim::kPrimCall->name()));
+  std::vector<KernelGraphPtr> child_graph_order;
+  for (auto &call_node : call_nodes) {
+    MS_EXCEPTION_IF_NULL(call_node);
+    auto call_child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node->cast<CNodePtr>());
+    for (const auto &child_graph : call_child_graphs) {
+      MS_EXCEPTION_IF_NULL(child_graph);
+      if (child_graph != parent_graph_) {
+        auto shared_this = std::dynamic_pointer_cast<KernelGraph>(shared_from_this());
+        MS_EXCEPTION_IF_NULL(shared_this);
+        child_graph->set_parent_graph(shared_this);
+      }
+      child_graph_order.push_back(child_graph);
+    }
+  }
+  for (size_t i = 0; i < child_graph_order.size(); ++i) {
+    MS_LOG(INFO) << "Child graph[" << i << "][id:" << child_graph_order[i]->graph_id() << "]";
+  }
+  child_graph_order_ = child_graph_order;
+}
+
 std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }
 
 KernelGraph::~KernelGraph() { device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_); }
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h
similarity index 94%
rename from mindspore/ccsrc/session/kernel_graph.h
rename to mindspore/ccsrc/backend/session/kernel_graph.h
index 6861d43de0..48df351120 100644
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/backend/session/kernel_graph.h
@@ -29,14 +29,14 @@
 #include "ir/anf.h"
 #include "utils/graph_utils.h"
 #include "utils/contract.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace session {
 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
 class KernelGraph : public FuncGraph {
  public:
-  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
+  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
     inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
     execution_order_ = {};
     executable_ = true;
@@ -154,6 +154,14 @@ class KernelGraph : public FuncGraph {
   AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
   void AddFinalOutputKernel(const AnfNodePtr &node);
   bool IsFinalOutputKernel(const AnfNodePtr &node) const;
+  uint32_t current_epoch() const { return current_epoch_; }
+  void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
+  void UpdateChildGraphOrder();
+  const std::vector<AnfNodePtr> &child_graph_result() const { return child_graph_result_; }
+  void AddChildGraphResult(const AnfNodePtr &parameter) { child_graph_result_.push_back(parameter); }
+  void set_child_graph_result(const std::vector<AnfNodePtr> &child_graph_result) {
+    child_graph_result_ = child_graph_result;
+  }
 
  private:
   // remove value node form graph
@@ -171,6 +179,7 @@ class KernelGraph : public FuncGraph {
   void UpdateControlDependRelations(const std::vector<AnfNodePtr> &depends);
 
   std::shared_ptr<std::vector<AnfNodePtr>> inputs_;
+  std::vector<AnfNodePtr> child_graph_result_;
   std::vector<CNodePtr> execution_order_;
   uint32_t graph_id_;
   uint32_t stream_distinction_label_;
@@ -216,6 +225,7 @@ class KernelGraph : public FuncGraph {
   std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
   std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
   std::set<AnfNodePtr> final_output_kernels_;
+  uint32_t current_epoch_;
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
diff --git a/mindspore/ccsrc/session/session.cc b/mindspore/ccsrc/backend/session/session.cc
similarity index 96%
rename from mindspore/ccsrc/session/session.cc
rename to mindspore/ccsrc/backend/session/session.cc
index ae70fc77aa..95484a1113 100644
--- a/mindspore/ccsrc/session/session.cc
+++ b/mindspore/ccsrc/backend/session/session.cc
@@ -17,17 +17,17 @@
 #include <memory>
 #include <algorithm>
 #include "include/inference.h"
-#include "session/session.h"
+#include "backend/session/session.h"
 #include "utils/load_onnx/anf_converter.h"
-#include "session/session_basic.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/session_factory.h"
 #include "utils/base_ref_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #ifdef ENABLE_D
 #include "utils/context/ms_context.h"
-#include "session/ascend_session.h"
+#include "backend/session/ascend_session.h"
 #else
-#include "session/cpu_session.h"
+#include "backend/session/cpu_session.h"
 #endif
 
 namespace py = pybind11;
diff --git a/mindspore/ccsrc/session/session.h b/mindspore/ccsrc/backend/session/session.h
similarity index 97%
rename from mindspore/ccsrc/session/session.h
rename to mindspore/ccsrc/backend/session/session.h
index b608163067..6ea9cfaa47 100644
--- a/mindspore/ccsrc/session/session.h
+++ b/mindspore/ccsrc/backend/session/session.h
@@ -23,7 +23,7 @@
 #include <memory>
 #include <map>
 
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include "ir/anf.h"
 #include "include/inference.h"
 
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc
similarity index 89%
rename from mindspore/ccsrc/session/session_basic.cc
rename to mindspore/ccsrc/backend/session/session_basic.cc
index 91e430182c..9755dfc7d0 100644
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include <utility>
 #include <algorithm>
 #include <unordered_map>
 #include <unordered_set>
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "ir/manager.h"
-#include "ir/param_value_py.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
+#include "ir/param_value.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
 #include "common/trans.h"
 #include "utils/context/ms_context.h"
 #include "utils/config_manager.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
-#include "pre_activate/common/common_backend_optimization.h"
-#include "pre_activate/pass/const_input_to_attr_registry.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
 #include "common/utils.h"
 #include "ir/dtype.h"
 #include "ir/anf.h"
@@ -38,12 +38,12 @@
 
 namespace mindspore {
 namespace session {
-static std::shared_ptr<std::map<PyObject *, ParameterPtr>> python_paras_;
-void ClearPythonParasMap() { python_paras_ = nullptr; }
+static std::shared_ptr<std::map<ParamValuePtr, ParameterPtr>> python_paras;
+void ClearPythonParasMap() { python_paras = nullptr; }
 namespace {
 const int kSummaryGetItem = 2;
 
-PyObject *GetParamDefaultInputTensor(const AnfNodePtr &node) {
+ParamValuePtr GetParamDefaultValue(const AnfNodePtr &node) {
   if (node == nullptr) {
     return nullptr;
   }
@@ -51,10 +51,7 @@ PyObject *GetParamDefaultInputTensor(const AnfNodePtr &node) {
   if (parameter == nullptr || !parameter->has_default()) {
     return nullptr;
   }
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(parameter->default_param());
-  MS_EXCEPTION_IF_NULL(param_value);
-  auto py_param = param_value->value();
-  return py_param.ptr();
+  return parameter->default_param();
 }
 
 BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const KernelGraph &graph,
@@ -77,7 +74,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
           return input_tensors[input_idx];
         }
       }
-      MS_LOG(EXCEPTION) << "Parameter : " << node->DebugString() << "has no output addr";
+      MS_LOG(EXCEPTION) << "Parameter : " << node->DebugString() << " has no output addr";
     }
   }
   // if proccess reach here,it remarks item_with_index is a real node(Parameter,or executable CNode)
@@ -110,8 +107,8 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
   return tensor;
 }
 
-BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
-                             const std::vector<tensor::TensorPtr> &input_tensors) {
+BaseRef CreateTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
+                              const std::vector<tensor::TensorPtr> &input_tensors) {
   MS_EXCEPTION_IF_NULL(anf);
   MS_LOG(INFO) << "Create tensor for output[" << anf->DebugString() << "]";
   auto item_with_index = AnfAlgo::VisitKernelWithReturnType(anf, 0);
@@ -123,7 +120,7 @@ BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
     MS_EXCEPTION_IF_NULL(cnode);
     VectorRef ret;
     for (size_t i = 1; i < cnode->inputs().size(); ++i) {
-      auto out = CreatTensorForOutput(cnode->input(i), graph, input_tensors);
+      auto out = CreateTensorForOutput(cnode->input(i), graph, input_tensors);
       ret.push_back(out);
     }
     return ret;
@@ -136,25 +133,6 @@ BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
   return CreateOneTensor(item_with_index.first, item_with_index.second, graph, input_tensors);
 }
 
-BaseRef CreatTupleForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
-                            const std::vector<tensor::TensorPtr> &input_tensors) {
-  MS_EXCEPTION_IF_NULL(anf);
-  if (!AnfAlgo::IsRealKernel(anf)) {
-    MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] should be a executable kernel";
-  }
-  if (anf->isa<ValueNode>()) {
-    return CreateOneTensor(anf, 0, graph, input_tensors);
-  }
-  VectorRef ret;
-  if (anf->isa<CNode>() && AnfAlgo::GetCNodeName(anf) != prim::kPrimMakeTuple->name()) {
-    for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(anf); ++i) {
-      auto out = CreateOneTensor(anf, i, graph, input_tensors);
-      ret.emplace_back(out);
-    }
-  }
-  return ret;
-}
-
 ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(anf);
   MS_EXCEPTION_IF_NULL(graph);
@@ -178,8 +156,8 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
   if (inputs_params == nullptr) {
     return 0;
   }
-  if (inputs_params->empty()) {
-    MS_LOG(EXCEPTION) << "Illegal empty inputs_params";
+  if (inputs_params->size() < 2) {
+    MS_LOG(EXCEPTION) << "Illegal inputs_params size";
   }
   auto tensor = (*inputs_params)[0];
   MS_EXCEPTION_IF_NULL(tensor);
@@ -190,6 +168,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
   // set loop_count to zero
   MS_EXCEPTION_IF_NULL(inputs);
   inputs->push_back(tensor);
+
+  auto epoch_tensor = (*inputs_params)[1];
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(epoch_val);
+  *epoch_val = graph->current_epoch();
+  epoch_tensor->set_dirty(true);
+  inputs->push_back(epoch_tensor);
+  MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;
+
+  graph->set_current_epoch(graph->current_epoch() + 1);
+
   return inputs_params->size();
 }
 
@@ -215,20 +205,20 @@ ParameterPtr ConstructRunOpParameter(const std::shared_ptr<KernelGraph> &graph,
   auto param = graph->NewParameter();
   MS_EXCEPTION_IF_NULL(param);
   if (tensor_mask == kParameterWeightTensorMask) {
-    py::object obj;
-    auto param_value_new = std::make_shared<ParamValuePy>(obj);
+    auto param_value_new = std::make_shared<ParamValue>();
     param->set_default_param(param_value_new);
   }
   // set the kernel info of parameter
   auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
   MS_EXCEPTION_IF_NULL(input_tensor);
-  if (input_tensor->device_address().get() == nullptr) {
+  auto device_address = std::dynamic_pointer_cast<device::DeviceAddress>(input_tensor->device_address());
+  if (device_address == nullptr) {
     kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{kOpFormat_DEFAULT});
     TypeId param_init_data_type = AnfAlgo::IsParameterWeight(param) ? kTypeUnknown : input_tensor->data_type();
     kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{param_init_data_type});
   } else {
-    kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{input_tensor->device_address()->format()});
-    kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{input_tensor->device_address()->type_id()});
+    kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{device_address->format()});
+    kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{device_address->type_id()});
   }
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), param.get());
   // construct abstract of parameter
@@ -311,7 +301,7 @@ void SessionBasic::InitInternalOutputParameter(const AnfNodePtr &out_node, const
   if (ref_real_node->isa<CNode>() && node_graph->IsInternalOutput(ref_real_node) &&
       node_graph->IsFinalOutputKernel(ref_real_node)) {
     auto kernel_info = ref_real_node->kernel_info();
-    if (kernel_info == nullptr || kernel_info->select_kernel_build_info() == nullptr) {
+    if (kernel_info == nullptr || !kernel_info->has_build_info()) {
       MS_LOG(INFO) << "No kernel info";
       return;
     }
@@ -322,9 +312,9 @@ void SessionBasic::InitInternalOutputParameter(const AnfNodePtr &out_node, const
     }
     auto format = AnfAlgo::GetOutputFormat(ref_real_node, ref_real_node_index);
     auto type = AnfAlgo::GetOutputDeviceDataType(ref_real_node, ref_real_node_index);
-    parameter->set_kernel_info(std::make_shared<device::KernelInfo>());
-    auto d_kernel_info = parameter->kernel_info();
+    auto d_kernel_info = std::make_shared<device::KernelInfo>();
     MS_EXCEPTION_IF_NULL(d_kernel_info);
+    parameter->set_kernel_info(d_kernel_info);
     kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
     builder.SetOutputsDeviceType({type});
     builder.SetOutputsFormat({format});
@@ -384,24 +374,24 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf
     MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] is not a parameter";
   }
   MS_EXCEPTION_IF_NULL(graph);
-  auto m_tensor = GetParamDefaultInputTensor(anf);
+  auto param_value = GetParamDefaultValue(anf);
   auto valid_inputs = graph->MutableValidInputs();
   MS_EXCEPTION_IF_NULL(valid_inputs);
   auto graph_inputs = graph->MutableInputs();
   MS_EXCEPTION_IF_NULL(graph_inputs);
   ParameterPtr new_parameter = nullptr;
   // if parameter's python parameter has been exist a backend parameter, reuse the exist parameter
-  if (python_paras_ == nullptr) {
-    python_paras_ = std::make_shared<std::map<PyObject *, ParameterPtr>>();
+  if (python_paras == nullptr) {
+    python_paras = std::make_shared<std::map<ParamValuePtr, ParameterPtr>>();
   }
-  auto iter = python_paras_->find(m_tensor);
-  if (iter != python_paras_->end()) {
+  auto iter = python_paras->find(param_value);
+  if (iter != python_paras->end()) {
     new_parameter = iter->second;
   } else {
     TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
     new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
-    if (m_tensor != nullptr) {
-      (*python_paras_)[m_tensor] = new_parameter;
+    if (param_value != nullptr) {
+      (*python_paras)[param_value] = new_parameter;
     }
     TraceManager::EndTrace();
   }
@@ -485,15 +475,13 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K
     } else if (optimize_depend && input_idx == kDependAttachNodeIndex) {
       cnode_inputs.push_back(origin_inputs[kRealInputIndexInDepend]);
       continue;
-    } else if (anf->isa<AnfNode>()) {
+    } else {
       *from_other_graph = true;
       // the input node is a cnode from other graph
       auto parameter_from_cnode = CreateNewParameterFromCNode(anf, valid_input, graph);
       cnode_inputs.push_back(parameter_from_cnode);
       (*other_graph_cnode)[anf] = parameter_from_cnode;
-      continue;
     }
-    MS_LOG(EXCEPTION) << "Unexpected input[" << anf->DebugString() << "]";
   }
   TraceManager::DebugTrace(std::make_shared<TraceCopy>(cnode->debug_info()));
   auto new_cnode = graph->NewCNode(cnode_inputs);
@@ -501,7 +489,50 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K
   return new_cnode;
 }
 
-static std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph) {
+CNodePtr SessionBasic::CreateSwitchInput(const AnfNodePtr &node_input, KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(node_input);
+  MS_EXCEPTION_IF_NULL(graph);
+  // switch input generalizes partial
+  if (AnfAlgo::CheckPrimitiveType(node_input, prim::kPrimPartial) ||
+      AnfAlgo::CheckPrimitiveType(node_input, prim::kPrimCall)) {
+    return node_input->cast<CNodePtr>();
+  }
+  if (node_input->isa<CNode>()) {
+    MS_LOG(EXCEPTION) << "If switch input is " << node_input->DebugString() << ", it mast be partial or call.";
+  }
+  std::vector<AnfNodePtr> partial_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimPartial->name()))};
+  if (node_input->isa<ValueNode>() && IsValueNode<FuncGraph>(node_input)) {
+    partial_inputs.emplace_back(node_input);
+    auto partial_node = graph->NewCNode(partial_inputs);
+    return partial_node;
+  }
+  KernelGraphPtr kernel_graph = NewKernelGraph();
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  kernel_graph->set_output(graph->GetBackendAnfByFrontAnf(node_input));
+  partial_inputs.emplace_back(std::make_shared<ValueNode>(kernel_graph));
+  auto partial_node = graph->NewCNode(partial_inputs);
+  return partial_node;
+}
+
+CNodePtr SessionBasic::HandleSwitchInputs(const AnfNodePtr &anf_node, KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(graph);
+  auto node = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->inputs().size() < kSwitchInputSize) {
+    MS_LOG(EXCEPTION) << "Switch input size less than " << kSwitchInputSize;
+  }
+  auto primitive = NewValueNode(std::make_shared<Primitive>(prim::kPrimSwitch->name()));
+  std::vector<AnfNodePtr> switch_inputs = {primitive, node->input(1)};
+  for (size_t index = 2; index < node->inputs().size(); index++) {
+    auto input = CreateSwitchInput(node->input(index), graph);
+    switch_inputs.emplace_back(input);
+  }
+  auto switch_node = graph->NewCNode(switch_inputs);
+  return switch_node;
+}
+
+std::vector<AnfNodePtr> SessionBasic::CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(graph);
   // create primitive of cnode:call(partial or switch)
@@ -526,7 +557,8 @@ static std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode,
                    });
     return cnode_inputs;
   } else if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimSwitch)) {
-    cnode_inputs.emplace_back(cnode_input);
+    auto switch_node = HandleSwitchInputs(cnode_input, graph);
+    cnode_inputs.emplace_back(switch_node);
     return cnode_inputs;
   }
   MS_LOG(EXCEPTION) << "CNode input[0] must be partial or switch.";
@@ -618,19 +650,19 @@ ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph
     MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] is not a parameter";
   }
 
-  auto m_tensor = GetParamDefaultInputTensor(anf);
+  auto param_value = GetParamDefaultValue(anf);
   ParameterPtr new_parameter = nullptr;
-  if (python_paras_ == nullptr) {
-    python_paras_ = std::make_shared<std::map<PyObject *, ParameterPtr>>();
+  if (python_paras == nullptr) {
+    python_paras = std::make_shared<std::map<ParamValuePtr, ParameterPtr>>();
   }
-  auto iter = python_paras_->find(m_tensor);
-  if (iter != python_paras_->end()) {
+  auto iter = python_paras->find(param_value);
+  if (iter != python_paras->end()) {
     new_parameter = iter->second;
   } else {
     TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
     new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
-    if (m_tensor != nullptr) {
-      (*python_paras_)[m_tensor] = new_parameter;
+    if (param_value != nullptr) {
+      (*python_paras)[param_value] = new_parameter;
     }
     TraceManager::EndTrace();
   }
@@ -776,13 +808,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> &para
 void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                  const std::vector<tensor::TensorPtr> &inputs_const) const {
   std::vector<tensor::TensorPtr> inputs(inputs_const);
-  size_t input_ctrl_size = 1;
+  size_t input_ctrl_size = 2;
   MS_EXCEPTION_IF_NULL(kernel_graph);
   if (kernel_graph->input_ctrl_tensors()) {
     input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
   }
   auto input_nodes = kernel_graph->inputs();
-  if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
+  if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
     MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                       << ", input_ctrl_size:" << input_ctrl_size;
   }
@@ -829,20 +861,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
                                  const std::vector<tensor::TensorPtr> &input_tensors) const {
   MS_EXCEPTION_IF_NULL(kernel_graph);
   MS_EXCEPTION_IF_NULL(outputs);
-  if (!kernel_graph->child_graph_order().empty()) {
-    // use the last child graph output as the root graph output
-    UpdateOutputs(kernel_graph->child_graph_order().back(), outputs, input_tensors);
-    return;
-  }
   auto anf_outputs = kernel_graph->outputs();
   for (auto &item : anf_outputs) {
     MS_EXCEPTION_IF_NULL(item);
     MS_LOG(INFO) << "Update output[" << item->DebugString() << "]";
-    if (AnfAlgo::IsTupleOutput(item) && AnfAlgo::IsRealKernel(item)) {
-      outputs->emplace_back(CreatTupleForOutput(item, *kernel_graph, input_tensors));
-      continue;
-    }
-    outputs->emplace_back(CreatTensorForOutput(item, *kernel_graph, input_tensors));
+    outputs->emplace_back(CreateTensorForOutput(item, *kernel_graph, input_tensors));
   }
 }
 
@@ -931,6 +954,11 @@ CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std:
   auto FindEqu = [graph, outputs](const AnfNodePtr &out) -> AnfNodePtr {
     auto backend_anf = graph->GetBackendAnfByFrontAnf(out);
     if (backend_anf != nullptr) {
+      auto context_ptr = MsContext::GetInstance();
+      MS_EXCEPTION_IF_NULL(context_ptr);
+      if (context_ptr->execution_mode() == kPynativeMode) {
+        return backend_anf;
+      }
       auto front_real_kernel = AnfAlgo::VisitKernel(out, 0);
       auto backend_real_kernel = AnfAlgo::VisitKernel(backend_anf, 0);
       MS_EXCEPTION_IF_NULL(out);
diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h
similarity index 94%
rename from mindspore/ccsrc/session/session_basic.h
rename to mindspore/ccsrc/backend/session/session_basic.h
index cf85dd0225..c662e3978b 100755
--- a/mindspore/ccsrc/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -24,14 +24,14 @@
 #include <map>
 
 #include "utils/base_ref_extends.h"
-#include "session/session_context.h"
-#include "session/kernel_graph.h"
+#include "backend/session/session_context.h"
+#include "backend/session/kernel_graph.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
 #include "utils/any.h"
 #include "utils/contract.h"
-#include "pynative/pynative_execute.h"
-#include "device/kernel_info.h"
+#include "pipeline/pynative/pynative_execute.h"
+#include "runtime/device/kernel_info.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
@@ -87,6 +87,10 @@ class SessionBasic {
                           std::unordered_map<AnfNodePtr, AnfNodePtr> *other_graph_cnode);
   CNodePtr CreateNewCNode(const CNodePtr &cnode, KernelGraph *graph);
 
+  CNodePtr CreateSwitchInput(const AnfNodePtr &node_input, KernelGraph *graph);
+  CNodePtr HandleSwitchInputs(const AnfNodePtr &anf_node, KernelGraph *graph);
+  std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph);
+
   // set parameters of final graph
   virtual GraphId SetFinalGraphInput(const std::vector<AnfNodePtr> &) { return kInvalidGraphId; }
   // set output of final graph
diff --git a/mindspore/ccsrc/session/session_context.cc b/mindspore/ccsrc/backend/session/session_context.cc
similarity index 95%
rename from mindspore/ccsrc/session/session_context.cc
rename to mindspore/ccsrc/backend/session/session_context.cc
index 2b6ebf6b84..f5ec49c090 100644
--- a/mindspore/ccsrc/session/session_context.cc
+++ b/mindspore/ccsrc/backend/session/session_context.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_context.h"
+#include "backend/session/session_context.h"
 namespace mindspore {
 namespace session {
 std::shared_ptr<Context> Context::GetInstance() {
diff --git a/mindspore/ccsrc/session/session_context.h b/mindspore/ccsrc/backend/session/session_context.h
similarity index 97%
rename from mindspore/ccsrc/session/session_context.h
rename to mindspore/ccsrc/backend/session/session_context.h
index 78794c348e..22cc0c813a 100644
--- a/mindspore/ccsrc/session/session_context.h
+++ b/mindspore/ccsrc/backend/session/session_context.h
@@ -23,7 +23,7 @@
 #include <string>
 
 #include "ir/tensor.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
 namespace mindspore {
 namespace session {
diff --git a/mindspore/ccsrc/session/session_factory.cc b/mindspore/ccsrc/backend/session/session_factory.cc
similarity index 96%
rename from mindspore/ccsrc/session/session_factory.cc
rename to mindspore/ccsrc/backend/session/session_factory.cc
index 4cd0481f8c..8a8f9a9cea 100644
--- a/mindspore/ccsrc/session/session_factory.cc
+++ b/mindspore/ccsrc/backend/session/session_factory.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_factory.h"
+#include "backend/session/session_factory.h"
 #include <memory>
 #include <iostream>
 #include <string>
diff --git a/mindspore/ccsrc/session/session_factory.h b/mindspore/ccsrc/backend/session/session_factory.h
similarity index 97%
rename from mindspore/ccsrc/session/session_factory.h
rename to mindspore/ccsrc/backend/session/session_factory.h
index 99db0afeb7..054f03cf4b 100644
--- a/mindspore/ccsrc/session/session_factory.h
+++ b/mindspore/ccsrc/backend/session/session_factory.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include "common/utils.h"
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 namespace mindspore {
 namespace session {
 using SessionCreator = std::function<std::shared_ptr<SessionBasic>()>;
diff --git a/mindspore/ccsrc/common.h b/mindspore/ccsrc/common.h
index 0928dcfcf6..6b882a15d4 100644
--- a/mindspore/ccsrc/common.h
+++ b/mindspore/ccsrc/common.h
@@ -23,13 +23,13 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/resolve.h"
+#include "abstract/dshape.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/resolve.h"
 
 namespace py = pybind11;
 #endif  // MINDSPORE_CCSRC_COMMON_H_
diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc
index 9cf6eb3a5a..1841826ca9 100644
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -18,9 +18,9 @@
 #include <numeric>
 #include <utility>
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel.h"
-#include "device/convert_tensor_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/common/trans.h b/mindspore/ccsrc/common/trans.h
index a8fc7c8a00..286c76afd0 100644
--- a/mindspore/ccsrc/common/trans.h
+++ b/mindspore/ccsrc/common/trans.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/dtype/type.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/common/utils.h b/mindspore/ccsrc/common/utils.h
index 8f6e8f7c0c..23d08f8f28 100644
--- a/mindspore/ccsrc/common/utils.h
+++ b/mindspore/ccsrc/common/utils.h
@@ -38,6 +38,14 @@ static inline std::string GetEnv(const std::string &envvar) {
 
   return std::string(value);
 }
+
+static inline int SetEnv(const char *envname, const char *envvar, int overwrite = 1) {
+#if defined(_WIN32)
+  return 0;
+#else
+  return ::setenv(envname, envvar, overwrite);
+#endif
+}
 }  // namespace common
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/dataset/core/client.h b/mindspore/ccsrc/dataset/core/client.h
deleted file mode 100644
index a10cb4596e..0000000000
--- a/mindspore/ccsrc/dataset/core/client.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DATASET_CORE_CLIENT_H_
-#define DATASET_CORE_CLIENT_H_
-
-// client.h
-// Include file for DE client functions
-
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/barrier_op.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/build_vocab_op.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/project_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/generator_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
-#include "dataset/engine/datasetops/concat_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
-
-namespace mindspore {
-namespace dataset {
-// This is a one-time global initializer that needs to be called at the
-// start of any minddata applications.
-extern Status GlobalInit();
-}  // namespace dataset
-}  // namespace mindspore
-
-#endif  // DATASET_CORE_CLIENT_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
deleted file mode 100644
index b78ddcd87b..0000000000
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_subdirectory(sampler)
-file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
-set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source OBJECT
-    generator_op.cc
-    io_block.cc
-    mindrecord_op.cc
-    tf_reader_op.cc
-    image_folder_op.cc
-    mnist_op.cc
-    voc_op.cc
-    coco_op.cc
-    manifest_op.cc
-    cifar_op.cc
-    random_data_op.cc
-    celeba_op.cc
-    text_file_op.cc
-    clue_op.cc
-    )
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
deleted file mode 100644
index 04e00d878d..0000000000
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vector>
-#include <utility>
-#include "dataset/kernels/image/bounding_box_augment_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/cv_tensor.h"
-
-namespace mindspore {
-namespace dataset {
-const float BoundingBoxAugmentOp::kDefRatio = 0.3;
-
-BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr<TensorOp> transform, float ratio)
-    : ratio_(ratio), transform_(std::move(transform)) {
-  rnd_.seed(GetSeed());
-}
-
-Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) {
-  IO_CHECK_VECTOR(input, output);
-  BOUNDING_BOX_CHECK(input);  // check if bounding boxes are valid
-  uint32_t num_of_boxes = input[1]->shape()[0];
-  uint32_t num_to_aug = num_of_boxes * ratio_;  // cast to int
-  std::vector<uint32_t> boxes(num_of_boxes);
-  std::vector<uint32_t> selected_boxes;
-  for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i;
-  // sample bboxes according to ratio picked by user
-  std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, rnd_);
-  std::shared_ptr<Tensor> crop_out;
-  std::shared_ptr<Tensor> res_out;
-  std::shared_ptr<CVTensor> input_restore = CVTensor::AsCVTensor(input[0]);
-
-  for (uint32_t i = 0; i < num_to_aug; i++) {
-    uint32_t min_x = 0;
-    uint32_t min_y = 0;
-    uint32_t b_w = 0;
-    uint32_t b_h = 0;
-    // get the required items
-    input[1]->GetItemAt<uint32_t>(&min_x, {selected_boxes[i], 0});
-    input[1]->GetItemAt<uint32_t>(&min_y, {selected_boxes[i], 1});
-    input[1]->GetItemAt<uint32_t>(&b_w, {selected_boxes[i], 2});
-    input[1]->GetItemAt<uint32_t>(&b_h, {selected_boxes[i], 3});
-    Crop(input_restore, &crop_out, min_x, min_y, b_w, b_h);
-    // transform the cropped bbox region
-    transform_->Compute(crop_out, &res_out);
-    // place the transformed region back in the restored input
-    std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
-    // check if transformed crop is out of bounds of the box
-    if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
-        res_img->mat().rows < b_h) {
-      // if so, resize to fit in the box
-      std::shared_ptr<TensorOp> resize_op = std::make_shared<ResizeOp>(b_h, b_w);
-      resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out);
-      res_img = CVTensor::AsCVTensor(res_out);
-    }
-    res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
-  }
-  (*output).push_back(std::move(std::static_pointer_cast<Tensor>(input_restore)));
-  (*output).push_back(input[1]);
-  return Status::OK();
-}
-
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc
deleted file mode 100644
index de1d915fbb..0000000000
--- a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
-
-#include <vector>
-#include <memory>
-#include <string>
-#include "dataset/util/path.h"
-
-namespace mindspore {
-namespace dataset {
-
-JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::string &dict_path, JiebaMode mode)
-    : jieba_mode_(mode), hmm_model_path_(hmm_path), mp_dict_path_(dict_path) {
-  jieba_parser_ = std::make_unique<cppjieba::Jieba>(mp_dict_path_, hmm_model_path_, "");
-}
-
-Status JiebaTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
-
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
-    RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
-  }
-
-  std::string_view sentence_v;
-  RETURN_IF_NOT_OK(input->GetItemAt(&sentence_v, {}));
-  std::string sentence{sentence_v};
-  std::vector<std::string> words;
-  if (sentence == "") {
-    words.push_back("");
-  } else {
-    if (jieba_mode_ == JiebaMode::kMp) {
-      jieba_parser_->CutSmall(sentence, words, MAX_WORD_LENGTH);
-    } else if (jieba_mode_ == JiebaMode::kHmm) {
-      jieba_parser_->CutHMM(sentence, words);
-    } else {  // Mix
-      jieba_parser_->Cut(sentence, words, true);
-    }
-  }
-  *output = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()}));
-  return Status::OK();
-}
-
-Status JiebaTokenizerOp::AddWord(const std::string &word, int freq) {
-  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
-  if (jieba_parser_->InsertUserWord(word, freq, "") == false) {
-    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "add word error");
-  }
-  return Status::OK();
-}
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc
deleted file mode 100644
index 063bf21630..0000000000
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "cppjieba/Unicode.hpp"
-
-using cppjieba::DecodeRunesInString;
-using cppjieba::RuneStrArray;
-
-namespace mindspore {
-namespace dataset {
-
-Status UnicodeCharTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
-    RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
-  }
-  std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
-
-  RuneStrArray runes;
-  if (!DecodeRunesInString(str.data(), str.size(), runes)) {
-    RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
-  }
-  std::vector<std::string> splits(runes.size());
-  for (size_t i = 0; i < runes.size(); i++) {
-    splits[i] = str.substr(runes[i].offset, runes[i].len);
-  }
-  if (splits.empty()) {
-    splits.emplace_back("");
-  }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
-  return Status::OK();
-}
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt
index ba0c5e07ac..37ffcceeaf 100644
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
         )
 endif (ENABLE_DEBUGGER)
 
+if (ENABLE_D)
+    list(APPEND _DEBUG_SRC_LIST
+        "${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
+        )
+    if (ENABLE_DATA_DUMP)
+        list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
+    endif(ENABLE_DATA_DUMP)
+endif()
+
 if (ENABLE_DUMP_E2E)
     list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
 endif (ENABLE_DUMP_E2E)
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc
index fc32e0fb5f..42d372cefb 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.cc
+++ b/mindspore/ccsrc/debug/anf_ir_dump.cc
@@ -24,9 +24,9 @@
 
 #include "ir/primitive.h"
 #include "ir/func_graph.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/graph_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 const std::string ToShortString(const TypeId &typeId) {
@@ -128,7 +128,7 @@ void DumpKernelInfo(const CNodePtr &node, const std::shared_ptr<SubGraphIRInfo>
     return;
   }
   auto kernel_info = node->kernel_info();
-  if (kernel_info == nullptr || kernel_info->select_kernel_build_info() == nullptr) {
+  if (kernel_info == nullptr || !kernel_info->has_build_info()) {
     return;
   }
 
@@ -179,7 +179,7 @@ void DumpParams(const FuncGraphPtr &graph, std::ostringstream &buffer, OrderedMa
     // print parameters' type and shape
     PrintNodeOutputType(buffer, p);
     auto kernel_info = p->kernel_info();
-    if (kernel_info != nullptr && kernel_info->select_kernel_build_info() != nullptr) {
+    if (kernel_info != nullptr && kernel_info->has_build_info()) {
       buffer << "  :  ";
       auto type = AnfAlgo::GetOutputDeviceDataType(p, 0);
       auto format = AnfAlgo::GetOutputFormat(p, 0);
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc
index c797b8efea..273a6f6458 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.cc
+++ b/mindspore/ccsrc/debug/anf_ir_utils.cc
@@ -26,19 +26,19 @@
 #include "utils/graph_utils.h"
 #include "utils/symbolic.h"
 #include "ir/meta_func_graph.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/tensor_py.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/resolve.h"
-#include "operator/composite/composite.h"
-#include "operator/composite/map.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/composite/map.h"
 #include "utils/ordered_map.h"
 #include "utils/ordered_set.h"
 #include "utils/utils.h"
 #include "debug/trace.h"
 #include "debug/label.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 using mindspore::tensor::TensorPy;
 
@@ -485,8 +485,8 @@ void AnfExporter::OutputParameters(std::ofstream &ofs, const std::vector<AnfNode
       MS_LOG(EXCEPTION) << "Param could not cast to parameter";
     }
     if (param_ptr->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-      ofs << " = @" << DumpObject(param_value->value(), "D");
+      auto param_value = param_ptr->default_param();
+      ofs << " = @" << DumpObject(py::cast(param_value), "D");
     }
 
     // output comment
@@ -1667,7 +1667,7 @@ class IrParser {
 
         // load parameter default value from serialized file
         py::object default_obj = LoadObject(lexer_.GetTokenText());
-        auto param_value_new = std::make_shared<ParamValuePy>(default_obj);
+        auto param_value_new = py::cast<ParamValuePtr>(default_obj);
         param->set_default_param(param_value_new);
 
         tok = lexer_.GetNextToken();
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.h b/mindspore/ccsrc/debug/anf_ir_utils.h
index 4503692eb9..ed5e3b8a5d 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.h
+++ b/mindspore/ccsrc/debug/anf_ir_utils.h
@@ -28,9 +28,9 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/meta_func_graph.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/resolve.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/ordered_map.h"
 #include "utils/ordered_set.h"
diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc
new file mode 100644
index 0000000000..6caf7e2c39
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.cc
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/common.h"
+
+#include <memory>
+#include <optional>
+#include "utils/system/env.h"
+#include "utils/system/file_system.h"
+#include "utils/log_adapter.h"
+#include "utils/context/ms_context.h"
+
+namespace mindspore {
+std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
+  std::string out_path;
+  auto path_split_pos = input_path.find_last_of('/');
+  if (path_split_pos == std::string::npos) {
+    path_split_pos = input_path.find_last_of('\\');
+  }
+  // get real path
+  char real_path[PATH_MAX] = {0};
+  if (path_split_pos != std::string::npos) {
+    std::string prefix_path = input_path.substr(0, path_split_pos);
+    if (prefix_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
+    auto ret = CreateNotExistDirs(prefix_path);
+    if (!ret) {
+      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
+      return std::nullopt;
+    }
+
+    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
+      return std::nullopt;
+    }
+    out_path = std::string(real_path) + last_path;
+  }
+
+  if (path_split_pos == std::string::npos) {
+    if (input_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    if (nullptr == realpath(input_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
+    }
+    out_path = std::string(real_path);
+  }
+  return out_path;
+}
+
+bool Common::CreateNotExistDirs(const std::string &path) {
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  char temp_path[PATH_MAX] = {0};
+  if (path.length() > PATH_MAX) {
+    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
+    return false;
+  }
+  for (uint32_t i = 0; i < path.length(); i++) {
+    temp_path[i] = path[i];
+    if (temp_path[i] == '\\' || temp_path[i] == '/') {
+      if (i != 0) {
+        char tmp_char = temp_path[i];
+        temp_path[i] = '\0';
+        std::string path_handle(temp_path);
+        if (!fs->FileExist(temp_path)) {
+          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
+          if (!fs->CreateDir(temp_path)) {
+            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
+            return false;
+          }
+        }
+        temp_path[i] = tmp_char;
+      }
+    }
+  }
+
+  if (!fs->FileExist(path)) {
+    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
+    if (!fs->CreateDir(path)) {
+      MS_LOG(ERROR) << "Create " << path << " dir error";
+      return false;
+    }
+  }
+  return true;
+}
+
+std::optional<std::string> Common::GetConfigFile(const std::string &env) {
+  if (env.empty()) {
+    MS_LOG(EXCEPTION) << "Invalid env";
+  }
+  auto config_path_str = std::getenv(env.c_str());
+  if (config_path_str == nullptr) {
+    MS_LOG(ERROR) << "Please export env:" << env;
+    return {};
+  }
+  MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;
+
+  std::string dump_config_file(config_path_str);
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  if (!fs->FileExist(dump_config_file)) {
+    MS_LOG(ERROR) << dump_config_file << " not exist.";
+    return {};
+  }
+  return dump_config_file;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h
new file mode 100644
index 0000000000..8d4a6cb467
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+
+#include <string>
+#include <optional>
+#include "utils/contract.h"
+
+namespace mindspore {
+class Common {
+ public:
+  Common() = default;
+  ~Common() = default;
+  static std::optional<std::string> GetRealPath(const std::string &input_path);
+  static std::optional<std::string> GetConfigFile(const std::string &env);
+
+ private:
+  static bool CreateNotExistDirs(const std::string &path);
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
diff --git a/mindspore/ccsrc/debug/data_dump_parser.cc b/mindspore/ccsrc/debug/data_dump_parser.cc
new file mode 100644
index 0000000000..259ec388d3
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.cc
@@ -0,0 +1,152 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/data_dump_parser.h"
+
+#include <fstream>
+#include "utils/context/ms_context.h"
+#include "debug/common.h"
+
+constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
+constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
+constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
+namespace mindspore {
+void DataDumpParser::ResetParam() {
+  enable_ = false;
+  net_name_.clear();
+  dump_mode_ = 0;
+  dump_step_ = 0;
+  kernel_set_.clear();
+}
+
+bool DataDumpParser::DumpEnabled() const {
+  auto enable_dump = std::getenv(kEnableDataDump);
+  if (!enable_dump) {
+    MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
+    return false;
+  }
+
+  auto enabled = std::atoi(enable_dump);
+  if (enabled != 1) {
+    MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
+    return false;
+  }
+
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  if (context->execution_mode() == kPynativeMode) {
+    MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
+  }
+  return true;
+}
+
+std::optional<std::string> DataDumpParser::GetDumpPath() const {
+  auto dump_path = std::getenv(kDataDumpPath);
+  if (!dump_path) {
+    MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
+    return {};
+  }
+  std::string dump_path_str(dump_path);
+  return dump_path_str;
+}
+
+void DataDumpParser::ParseDumpConfig() {
+  std::lock_guard<std::mutex> guard(lock_);
+  MS_LOG(INFO) << "[DataDump] parse start";
+  if (!DumpEnabled()) {
+    MS_LOG(INFO) << "[DataDump] dump not enable";
+    return;
+  }
+
+  ResetParam();
+
+  auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
+  if (!dump_config_file.has_value()) {
+    MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
+  }
+
+  std::ifstream json_file(dump_config_file.value());
+  if (!json_file.is_open()) {
+    MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
+  }
+
+  nlohmann::json j;
+  json_file >> j;
+  if (j.find("DumpSettings") == j.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
+  }
+
+  nlohmann::json dump_settings = j.at("DumpSettings");
+  // convert json to string
+  std::stringstream ss;
+  ss << dump_settings;
+  std::string cfg = ss.str();
+  MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
+  if (!IsConfigExist(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
+  }
+
+  if (!ParseDumpSetting(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
+  }
+}
+
+bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
+  if (!DumpEnabled()) {
+    return false;
+  }
+  if (dump_mode_ == 0) {
+    return true;
+  }
+  auto iter = kernel_set_.find(op_full_name);
+  return iter != kernel_set_.end();
+}
+
+bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
+  if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
+      dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
+    MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
+    return false;
+  }
+  return true;
+}
+
+bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
+  auto mode = dump_settings.at("mode");
+  auto net_name = dump_settings.at("net_name");
+  auto iteration = dump_settings.at("iteration");
+  auto kernels = dump_settings.at("kernels");
+  if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
+    MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
+    enable_ = false;
+    return false;
+  }
+
+  enable_ = true;
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  dump_mode_ = mode;
+  net_name_ = net_name;
+  dump_step_ = iteration;
+  for (const auto &kernel : kernels) {
+    auto kernel_str = kernel.dump();
+    kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
+    MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
+    kernel_set_.insert(kernel_str);
+  }
+  return true;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/data_dump_parser.h b/mindspore/ccsrc/debug/data_dump_parser.h
new file mode 100644
index 0000000000..751c61dd1a
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+
+#include <string>
+#include <set>
+#include <mutex>
+#include <optional>
+#include "nlohmann/json.hpp"
+#include "common/utils.h"
+
+namespace mindspore {
+class DataDumpParser {
+ public:
+  static DataDumpParser &GetInstance() {
+    static DataDumpParser instance;
+    return instance;
+  }
+  void ParseDumpConfig();
+  bool NeedDump(const std::string &op_full_name) const;
+  bool DumpEnabled() const;
+  std::optional<std::string> GetDumpPath() const;
+  bool enable() const { return enable_; }
+  const std::string &net_name() const { return net_name_; }
+  uint32_t dump_mode() const { return dump_mode_; }
+  uint32_t dump_step() const { return dump_step_; }
+  const std::set<std::string> &kernel_set() const { return kernel_set_; }
+
+ private:
+  DataDumpParser() = default;
+  virtual ~DataDumpParser() = default;
+  DISABLE_COPY_AND_ASSIGN(DataDumpParser);
+
+  void ResetParam();
+  bool IsConfigExist(const nlohmann::json &dump_settings) const;
+  bool ParseDumpSetting(const nlohmann::json &dump_settings);
+
+  std::mutex lock_;
+  bool enable_{false};
+  std::string net_name_;
+  uint32_t dump_mode_{0};
+  uint32_t dump_step_{0};
+  std::set<std::string> kernel_set_;
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc
index cb883eef51..cc6c5c53ad 100644
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -37,8 +37,8 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
 
 DebugServices::~DebugServices() { delete tensor_loader_; }
 
-void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition,
-                                   const std::vector<std::tuple<std::string, bool>> &check_node_list) {
+void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
+                                  const std::vector<std::tuple<std::string, bool>> &check_node_list) {
   std::lock_guard<std::mutex> lg(lock_);
 
   watchpoint_t watchpoint_item;
@@ -57,14 +57,14 @@ void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition
   watchpoint_table[id] = watchpoint_item;
 }
 
-void DebugServices::remove_watchpoint(unsigned int id) {
+void DebugServices::RemoveWatchpoint(unsigned int id) {
   std::lock_guard<std::mutex> lg(lock_);
   watchpoint_table.erase(id);
 }
 
-void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
-                                      std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                                      std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
+void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
+                                     std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                                     std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
   std::lock_guard<std::mutex> lg(lock_);
 
   std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();
@@ -171,9 +171,9 @@ void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vecto
   }
 }
 
-void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
-                                       std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                                       std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
+void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
+                                     std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                                     std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
   std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
   tensor_loader_->SearchTensors(name, &result_list);
 
@@ -189,6 +189,28 @@ void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vecto
   }
 }
 
-TensorLoader *DebugServices::get_tensor_loader() const { return tensor_loader_; }
+bool DebugServices::IsWatchPoint(std::string kernel_name,
+                                 std::unordered_map<unsigned int, watchpoint_t> watchpoint_table) {
+  bool ret = false;
+  for (auto w_table_item : watchpoint_table) {
+    auto check_node_list = std::get<1>(w_table_item).check_node_list;
+    for (auto check_node : check_node_list) {
+      std::string w_name = std::get<0>(check_node);
+      bool w_type = std::get<1>(check_node);
+      if ((w_type == true &&
+           ((kernel_name.find(w_name) != string::npos && kernel_name.rfind(w_name, 0) == 0) || w_name == "*")) ||
+          (w_type == false && kernel_name == w_name)) {
+        ret = true;
+        return ret;
+      }
+    }
+  }
+  return ret;
+}
+
+TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; }
+std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
+  return watchpoint_table;
+}
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h
index b2fd41cd68..41400af1d5 100644
--- a/mindspore/ccsrc/debug/debug_services.h
+++ b/mindspore/ccsrc/debug/debug_services.h
@@ -37,22 +37,6 @@ class DebugServices {
 
   ~DebugServices();
 
-  void add_watchpoint(unsigned int id, unsigned int watch_condition,
-                      const std::vector<std::tuple<std::string, bool>> &check_node_list);
-
-  void remove_watchpoint(unsigned int id);
-
-  void check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
-                         std::vector<unsigned int> *data_size, std::vector<int> *condition,
-                         std::vector<unsigned int> *wacthpoint_id);
-
-  void read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
-                          std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                          std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
-
-  TensorLoader *get_tensor_loader() const;
-
- private:
   typedef struct condition_no_param {
     bool enabled = false;
   } condition_no_param_t;
@@ -84,6 +68,26 @@ class DebugServices {
     std::vector<std::tuple<std::string, bool>> check_node_list;
   } watchpoint_t;
 
+  void AddWatchpoint(unsigned int id, unsigned int watch_condition,
+                     const std::vector<std::tuple<std::string, bool>> &check_node_list);
+
+  void RemoveWatchpoint(unsigned int id);
+
+  void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
+                        std::vector<unsigned int> *data_size, std::vector<int> *condition,
+                        std::vector<unsigned int> *wacthpoint_id);
+
+  void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
+                        std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                        std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
+
+  bool IsWatchPoint(std::string kernel_name, std::unordered_map<unsigned int, watchpoint_t> watchpoint_table);
+
+  TensorLoader *tensor_loader() const;
+
+  std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
+
+ private:
   std::mutex lock_;
 
   std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
diff --git a/mindspore/ccsrc/debug/debugger/debug_graph.proto b/mindspore/ccsrc/debug/debugger/debug_graph.proto
index 042360fac3..0930791ac0 100644
--- a/mindspore/ccsrc/debug/debugger/debug_graph.proto
+++ b/mindspore/ccsrc/debug/debugger/debug_graph.proto
@@ -313,4 +313,10 @@ message TensorProto {
 
   // If the tensor content transferring is finished.
   optional bool finished = 6;
+
+  // The iteration of the tensor. Supported: "prev" or leave empty.
+  optional string iter = 7;
+
+  // If the tensor name should be truncated.
+  optional bool truncate = 8;
 }
\ No newline at end of file
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index ea147a929f..dd89e17e2d 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -19,8 +19,8 @@
 #include <vector>
 #include <algorithm>
 #include "debug/debugger/debugger.h"
-#include "pipeline/pipeline.h"
-#include "session/anf_runtime_algorithm.h"
+#include "pipeline/jit/pipeline.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using debugger::EventReply;
 using debugger::GraphProto;
@@ -43,7 +43,8 @@ Debugger::Debugger()
       device_id_(0),
       num_step_(0),
       debugger_enabled_(false),
-      is_dataset_graph_(false) {}
+      is_dataset_graph_(false),
+      partial_memory_(false) {}
 
 void Debugger::Init(const uint32_t device_id) {
   // access lock for public method
@@ -57,6 +58,7 @@ void Debugger::EnableDebugger() {
   // reset some of the class members
   num_step_ = 0;
   debugger_enabled_ = false;
+  partial_memory_ = false;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
 
@@ -72,7 +74,8 @@ void Debugger::EnableDebugger() {
     MS_LOG(WARNING) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger.";
     return;
   }
-  // configure host
+
+  // configure grpc host
   const char *env_host_str = std::getenv("MS_DEBUGGER_HOST");
   std::string host;
   if (env_host_str != nullptr) {
@@ -82,7 +85,7 @@ void Debugger::EnableDebugger() {
     MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
     host = "localhost";
   }
-  // configure port
+  // configure grpc port
   const char *env_port_str = std::getenv("MS_DEBUGGER_PORT");
   std::string port;
   if (env_port_str != nullptr) {
@@ -93,6 +96,27 @@ void Debugger::EnableDebugger() {
     port = "50051";
   }
 
+  // configure partial memory reuse
+  const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM");
+  if (env_partial_mem_str != nullptr) {
+    MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str;
+    if (std::strcmp(env_partial_mem_str, "1") == 0) {
+      partial_memory_ = true;
+    }
+  }
+  // switch memory reuse on or off
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  context_ptr->set_enable_mem_reuse(partial_memory_);
+  // print some message about memory reuse to user
+  if (partial_memory_) {
+    MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first "
+                       "step. 2. Tensor values are only available for nodes that are watched by any watchpoint.";
+  } else {
+    MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
+                       "usage for large models.";
+  }
+
   // initialize grpc client
   grpc_client_ = std::make_unique<GrpcClient>(host, port);
   debug_services_ = std::make_unique<DebugServices>();
@@ -106,6 +130,7 @@ void Debugger::Reset() {
   num_step_ = 0;
   debugger_enabled_ = false;
   is_dataset_graph_ = false;
+  partial_memory_ = false;
   graph_ptr_ = nullptr;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
@@ -178,7 +203,7 @@ void Debugger::CheckDatasetGraph() {
   is_dataset_graph_ = false;
 }
 
-GraphProto Debugger::GetGraphProto() {
+GraphProto Debugger::GetGraphProto() const {
   // convert kernel graph to debugger modelproto
   ModelProto model = GetDebuggerFuncGraphProto(graph_ptr_);
   return model.graph();
@@ -261,12 +286,9 @@ void Debugger::CommandLoop() {
             MS_LOG(INFO) << "node name: " << node.node_name();
             MS_LOG(INFO) << "node type: " << node.node_type();
           }
-          WatchCondition recieved_condition = GetWatchcondition(reply);
-          MS_LOG(INFO) << "condition: " << recieved_condition.condition();
-          int32_t id = GetWatchpointID(reply);
-          MS_LOG(INFO) << "id: " << id;
-          bool delete_ = GetWatchpointDelete(reply);
-          MS_LOG(INFO) << "delete: " << delete_;
+          MS_LOG(INFO) << "condition: " << GetWatchcondition(reply).condition();
+          MS_LOG(INFO) << "id: " << GetWatchpointID(reply);
+          MS_LOG(INFO) << "delete: " << GetWatchpointDelete(reply);
         }
         MS_LOG(INFO) << "Setting watchpoint";
         if (GetWatchpointDelete(reply)) {
@@ -284,15 +306,20 @@ void Debugger::CommandLoop() {
             MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
             MS_LOG(INFO) << "tensor slot: " << tensor.slot();
             MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
+            MS_LOG(INFO) << "tensor iter: " << tensor.iter();
+            MS_LOG(INFO) << "tensor truncate: " << std::boolalpha << tensor.truncate() << std::noboolalpha;
           }
         }
         MS_LOG(INFO) << "Sending tensors";
         std::list<TensorProto> tensors = LoadTensors(GetTensors(reply));
         {
+          // print view cmd reply
           for (auto tensor : tensors) {
             MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
             MS_LOG(INFO) << "tensor slot: " << tensor.slot();
             MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
+            MS_LOG(INFO) << "tensor iter: " << tensor.iter();
+            MS_LOG(INFO) << "tensor truncate: " << std::boolalpha << tensor.truncate() << std::noboolalpha;
             MS_LOG(INFO) << "tensor dims: ";
             for (auto dim : tensor.dims()) {
               MS_LOG(INFO) << dim << ",";
@@ -309,81 +336,18 @@ void Debugger::CommandLoop() {
   }
 }
 
-DebuggerCommand Debugger::GetCommand(const EventReply &reply) {
-  DebuggerCommand cmd = DebuggerCommand::kUnknownCMD;
-  switch (reply.cmd_case()) {
-    case debugger::EventReply::CmdCase::kExit:
-      cmd = DebuggerCommand::kExitCMD;
-      break;
-    case debugger::EventReply::CmdCase::kRunCmd:
-      cmd = DebuggerCommand::kRunCMD;
-      break;
-    case debugger::EventReply::CmdCase::kSetCmd:
-      cmd = DebuggerCommand::kSetCMD;
-      break;
-    case debugger::EventReply::CmdCase::kViewCmd:
-      cmd = DebuggerCommand::kViewCMD;
-      break;
-    default:
-      MS_LOG(ERROR) << "Error: UnknownCMD";
-      break;
-  }
-  return cmd;
-}
-
-ProtoVector<WatchNode> Debugger::GetWatchnodes(const EventReply &reply) {
-  if (!reply.has_set_cmd()) {
-    MS_LOG(ERROR) << "Error: Not SetCMD, can not get WatchNodes. Returning default value: ProtoVector<WatchNode>().";
-    return ProtoVector<WatchNode>();
-  }
-  return reply.set_cmd().watch_nodes();
-}
-
-WatchCondition Debugger::GetWatchcondition(const EventReply &reply) {
-  if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) {
-    MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition().";
-    return WatchCondition();
-  }
-  return reply.set_cmd().watch_condition();
-}
-
-int32_t Debugger::GetWatchpointID(const EventReply &reply) {
-  if (!reply.has_set_cmd()) {
-    MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint ID. Returning default value: 0.";
-    return 0;
-  }
-  return reply.set_cmd().id();
-}
-
-bool Debugger::GetWatchpointDelete(const EventReply &reply) {
-  if (!reply.has_set_cmd()) {
-    MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint delete flag. Returning default value: false.";
-    return false;
-  }
-  return reply.set_cmd().delete_();
-}
-
-ProtoVector<TensorProto> Debugger::GetTensors(const EventReply &reply) {
-  if (!reply.has_view_cmd()) {
-    MS_LOG(ERROR) << "Error: Not ViewCMD, can not get Tensors. Returning default value: ProtoVector<TensorProto>().";
-    return ProtoVector<TensorProto>();
-  }
-  return reply.view_cmd().tensors();
-}
-
 void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id) {
   std::vector<std::tuple<std::string, bool>> check_node_list;
   std::transform(nodes.begin(), nodes.end(), std::back_inserter(check_node_list),
                  [](WatchNode node) -> std::tuple<std::string, bool> {
                    return make_tuple(node.node_name(), node.node_type() == "scope");
                  });
-
-  debug_services_->add_watchpoint(id, condition.condition(), check_node_list);
+  debug_services_->AddWatchpoint(id, condition.condition(), check_node_list);
 }
 
-void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->remove_watchpoint(id); }
+void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
 
-std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &tensors) {
+std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &tensors) const {
   std::vector<std::string> name;
   std::vector<std::string> ret_name;
   std::vector<char *> data_ptr;
@@ -391,38 +355,42 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten
   std::vector<TypePtr> dtype;
   std::vector<std::vector<int>> shape;
 
-  std::transform(tensors.begin(), tensors.end(), std::back_inserter(name),
-                 [](TensorProto tensor) -> std::string { return tensor.node_name() + ":" + tensor.slot(); });
+  std::transform(tensors.begin(), tensors.end(), std::back_inserter(name), GetTensorFullName);
 
-  debug_services_->read_nodes_tensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
+  // ret_name will contain tensor names that are found in TensorLoader
+  // items in ret_name will be in the same order with tensors if found
+  debug_services_->ReadNodesTensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
 
   std::list<TensorProto> tensor_list;
   unsigned int result_index = 0;
-  TensorProto tensor_item;
-
   for (auto tensor : tensors) {
+    TensorProto tensor_item;
     tensor_item.set_node_name(tensor.node_name());
     tensor_item.set_slot(tensor.slot());
+    tensor_item.set_iter(tensor.iter());
+    tensor_item.set_truncate(tensor.truncate());
+    tensor_item.clear_tensor_content();
+    tensor_item.clear_data_type();
+    tensor_item.clear_dims();
+    // always set finished to true before big tensor splitting is supported
     tensor_item.set_finished(true);
 
     // return empty tensor if didn't find the requested tensor
-    if (result_index >= ret_name.size() || ret_name[result_index] != tensor.node_name() + ":" + tensor.slot()) {
+    if (result_index >= ret_name.size() || ret_name[result_index] != GetTensorFullName(tensor)) {
       tensor_list.push_back(tensor_item);
       continue;
     }
 
     tensor_item.set_tensor_content(data_ptr[result_index], data_size[result_index]);
     tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index]));
-    tensor_item.clear_dims();
     for (auto &elem : shape[result_index]) {
       tensor_item.add_dims(elem);
     }
 
+    // add tensor to result list and increment result_index to check next item in ret_name
     tensor_list.push_back(tensor_item);
-
     result_index++;
   }
-
   return tensor_list;
 }
 
@@ -432,7 +400,7 @@ void Debugger::Exit() {
   std::exit(EXIT_FAILURE);
 }
 
-std::list<WatchpointHit> Debugger::CheckWatchpoints() {
+std::list<WatchpointHit> Debugger::CheckWatchpoints() const {
   std::vector<std::string> name;
   std::vector<std::string> slot;
   std::vector<char *> data_ptr;
@@ -440,33 +408,24 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() {
   std::vector<int> condition;
   std::vector<unsigned int> watchpoint_id;
 
-  debug_services_->check_watchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
-
-  std::list<WatchpointHit> points;
-
+  debug_services_->CheckWatchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
+  std::list<WatchpointHit> hits;
   for (unsigned int i = 0; i < name.size(); i++) {
-    TensorProto *tensor_item;
-    tensor_item = new TensorProto();
+    WatchpointHit hit;
+    hit.set_id(watchpoint_id[i]);
+
+    // here TensorProto act as a tensor indicator, not sending tensor content
+    TensorProto *tensor_item = hit.mutable_tensor();
     tensor_item->set_node_name(name[i]);
     tensor_item->set_slot(slot[i]);
-    tensor_item->set_tensor_content(data_ptr[i], data_size[i]);
-
-    // finished in TensorProto will always be true before we implement big tensor splitting
     tensor_item->set_finished(true);
 
-    WatchCondition *condition_item;
-    condition_item = new WatchCondition();
+    WatchCondition *condition_item = hit.mutable_watch_condition();
     condition_item->set_condition(debugger::WatchCondition_Condition(condition[i]));
 
-    WatchpointHit point;
-    point.set_allocated_tensor(tensor_item);
-    point.set_allocated_watch_condition(condition_item);
-    point.set_id(watchpoint_id[i]);
-
-    points.push_back(point);
+    hits.push_back(hit);
   }
-
-  return points;
+  return hits;
 }
 
 void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) {
@@ -481,8 +440,83 @@ void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points)
   CommandLoop();
 }
 
-DebugServices *Debugger::get_debug_services() { return debug_services_.get(); }
+DebugServices *Debugger::debug_services() const { return debug_services_.get(); }
+
+bool Debugger::debugger_enabled() const { return debugger_enabled_; }
+
+DebuggerCommand GetCommand(const EventReply &reply) {
+  DebuggerCommand cmd = DebuggerCommand::kUnknownCMD;
+  switch (reply.cmd_case()) {
+    case debugger::EventReply::CmdCase::kExit:
+      cmd = DebuggerCommand::kExitCMD;
+      break;
+    case debugger::EventReply::CmdCase::kRunCmd:
+      cmd = DebuggerCommand::kRunCMD;
+      break;
+    case debugger::EventReply::CmdCase::kSetCmd:
+      cmd = DebuggerCommand::kSetCMD;
+      break;
+    case debugger::EventReply::CmdCase::kViewCmd:
+      cmd = DebuggerCommand::kViewCMD;
+      break;
+    default:
+      MS_LOG(ERROR) << "Error: UnknownCMD";
+      break;
+  }
+  return cmd;
+}
+
+ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply) {
+  if (!reply.has_set_cmd()) {
+    MS_LOG(ERROR) << "Error: Not SetCMD, can not get WatchNodes. Returning default value: ProtoVector<WatchNode>().";
+    return ProtoVector<WatchNode>();
+  }
+  return reply.set_cmd().watch_nodes();
+}
+
+WatchCondition GetWatchcondition(const EventReply &reply) {
+  if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) {
+    MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition().";
+    return WatchCondition();
+  }
+  return reply.set_cmd().watch_condition();
+}
+
+int32_t GetWatchpointID(const EventReply &reply) {
+  if (!reply.has_set_cmd()) {
+    MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint ID. Returning default value: 0.";
+    return 0;
+  }
+  return reply.set_cmd().id();
+}
+
+bool GetWatchpointDelete(const EventReply &reply) {
+  if (!reply.has_set_cmd()) {
+    MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint delete flag. Returning default value: false.";
+    return false;
+  }
+  return reply.set_cmd().delete_();
+}
+
+ProtoVector<TensorProto> GetTensors(const EventReply &reply) {
+  if (!reply.has_view_cmd()) {
+    MS_LOG(ERROR) << "Error: Not ViewCMD, can not get Tensors. Returning default value: ProtoVector<TensorProto>().";
+    return ProtoVector<TensorProto>();
+  }
+  return reply.view_cmd().tensors();
+}
+
+std::string GetTensorFullName(const TensorProto &tensor) {
+  string node_name = tensor.node_name();
+  if (tensor.truncate()) {
+    // scopes in node name are seperated by '/'
+    // use the name without scope if truncate is true
+    std::size_t found = node_name.find_last_of("/");
+    node_name = node_name.substr(found + 1);
+  }
+  return node_name + ":" + tensor.slot() + (tensor.iter() == "" ? "" : ":" + tensor.iter());
+}
 
-bool Debugger::debugger_enabled() { return debugger_enabled_; }
+bool Debugger::partial_memory() { return partial_memory_; }
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h
index 6ce7d03625..5a3965d7cc 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -19,7 +19,7 @@
 #include <list>
 #include <memory>
 #include <string>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "debug/debugger/grpc_client.h"
 #include "debug/debug_services.h"
 
@@ -72,9 +72,11 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // suspend the execution after a debug_op
   void PostDebugOp();
 
-  DebugServices *get_debug_services();
+  DebugServices *debug_services() const;
 
-  bool debugger_enabled();
+  bool debugger_enabled() const;
+
+  bool partial_memory();
 
  private:
   // private constructor for singleton
@@ -92,7 +94,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   void CheckDatasetGraph();
 
   // serialize graph and get proto
-  GraphProto GetGraphProto();
+  GraphProto GetGraphProto() const;
 
   // send graph and enter command wait loop
   void SendGraphAndSuspend(const GraphProto &graph_proto);
@@ -102,16 +104,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // break if RunCMD
   void CommandLoop();
 
-  // process reply and command type
-  DebuggerCommand GetCommand(const EventReply &reply);
-
-  // parse other data out of EventReply
-  ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
-  WatchCondition GetWatchcondition(const EventReply &reply);
-  int32_t GetWatchpointID(const EventReply &reply);
-  bool GetWatchpointDelete(const EventReply &reply);
-  ProtoVector<TensorProto> GetTensors(const EventReply &reply);
-
   // set what nodes and conditions to watch
   void SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id);
 
@@ -119,14 +111,14 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   void RemoveWatchpoint(const int32_t id);
 
   // load tensor for view command
-  std::list<TensorProto> LoadTensors(const ProtoVector<TensorProto> &tensors);
+  std::list<TensorProto> LoadTensors(const ProtoVector<TensorProto> &tensors) const;
 
   // terminate training process
   void Exit();
 
   // analyze tensors and check watchpoint conditions
   // return names of tensors and what condition they hit
-  std::list<WatchpointHit> CheckWatchpoints();
+  std::list<WatchpointHit> CheckWatchpoints() const;
 
   // send watchpoints that hit and enter command wait loop
   void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points);
@@ -139,6 +131,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   int32_t num_step_;
   bool debugger_enabled_;
   bool is_dataset_graph_;
+  bool partial_memory_;
   std::mutex access_lock_;
 
   // singleton
@@ -155,5 +148,18 @@ ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph);
 // for getting proto DataType from Type of Tensor
 DataType GetDebuggerNumberDataType(const TypePtr &type);
 
+// process reply and command type
+DebuggerCommand GetCommand(const EventReply &reply);
+
+// parse other data out of EventReply
+ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
+WatchCondition GetWatchcondition(const EventReply &reply);
+int32_t GetWatchpointID(const EventReply &reply);
+bool GetWatchpointDelete(const EventReply &reply);
+ProtoVector<TensorProto> GetTensors(const EventReply &reply);
+
+// get the full name of a tensor, which is the name used in TensorLoader
+std::string GetTensorFullName(const TensorProto &tensor);
+
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
diff --git a/mindspore/ccsrc/debug/draw.cc b/mindspore/ccsrc/debug/draw.cc
index 573452eac0..ff8132fb28 100644
--- a/mindspore/ccsrc/debug/draw.cc
+++ b/mindspore/ccsrc/debug/draw.cc
@@ -25,11 +25,11 @@
 
 #include "pybind11/pybind11.h"
 #include "ir/meta_func_graph.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/primitive.h"
 #include "utils/graph_utils.h"
 #include "utils/utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/tensor.h"
 
 namespace py = pybind11;
@@ -321,18 +321,9 @@ void BaseDigraph::FuncGraphParameters(const FuncGraphPtr &key) {
     buffer_ << parameter->ToString();
     auto param = parameter->cast<ParameterPtr>();
     if (param->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-      auto py_p = param_value->value();
-      if (py::hasattr(py_p, "default_input")) {
-        py_p = py_p.attr("default_input");
-        std::vector<int> shape;
-        if (py::hasattr(py_p, PYTHON_TENSOR_FLAG)) {
-          auto m_tensor = py_p.cast<std::shared_ptr<tensor::Tensor>>();
-          shape = m_tensor->shape();
-        } else if (py::hasattr(py_p, PYTHON_META_TENSOR_FLAG)) {
-          auto m_tensor = py_p.cast<std::shared_ptr<tensor::MetaTensor>>();
-          shape = m_tensor->shape();
-        }
+      auto tensor = param->default_param()->value();
+      if (tensor) {
+        auto &shape = tensor->shape();
         std::ostringstream shape_str;
         std::copy(shape.begin(), shape.end(), std::ostream_iterator<int>(shape_str, ","));
         buffer_ << "[" << shape_str.str() << "]";
diff --git a/mindspore/ccsrc/debug/draw.h b/mindspore/ccsrc/debug/draw.h
index 7804c6e94a..cb670fe0f6 100644
--- a/mindspore/ccsrc/debug/draw.h
+++ b/mindspore/ccsrc/debug/draw.h
@@ -22,7 +22,7 @@
 #include <vector>
 #include "ir/anf.h"
 #include "utils/any.h"
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/parse/resolve.h"
 
 namespace mindspore {
 namespace draw {
diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc
index 99440537c7..35cdfafe26 100644
--- a/mindspore/ccsrc/debug/dump_proto.cc
+++ b/mindspore/ccsrc/debug/dump_proto.cc
@@ -453,6 +453,7 @@ void ProtoExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &
     GetOpNodeTypeAndAttrs(func_graph, op, node_proto);
     node_proto->set_name(std::to_string(apply_idx));
     node_proto->set_scope(node->scope()->name());
+    node_proto->set_full_name(node->fullname_with_scope());
 
     // process OP inputs
     for (size_t i = 1; i < inputs.size(); ++i) {
diff --git a/mindspore/ccsrc/debug/e2e_dump.cc b/mindspore/ccsrc/debug/e2e_dump.cc
index 78a331fc27..9037a6d00b 100644
--- a/mindspore/ccsrc/debug/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/e2e_dump.cc
@@ -17,12 +17,14 @@
 #include <limits.h>
 #include <fstream>
 #include <string>
+#include <optional>
 #include <nlohmann/json.hpp>
 #include "utils/log_adapter.h"
 #include "utils/system/file_system.h"
 #include "utils/system/env.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
+#include "debug/common.h"
 
 using json = nlohmann::json;
 
@@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
     return false;
   }
 
-  std::string realpath;
-  bool ret = GetRealPath(filename, &realpath);
-  if (!ret) {
+  auto realpath = Common::GetRealPath(filename);
+  if (!realpath.has_value()) {
     MS_LOG(ERROR) << "Get real path failed.";
     return false;
   }
   std::ofstream fd;
-  fd.open(realpath, std::ios::binary | std::ios::out);
+  fd.open(realpath.value(), std::ios::binary | std::ios::out);
   if (!fd.is_open()) {
-    MS_LOG(ERROR) << "Open file " << realpath << " fail.";
+    MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
     return false;
   }
   (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
   fd.close();
   return true;
 }
-
-bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
-  MS_EXCEPTION_IF_NULL(outpath);
-  auto path_split_pos = inpath.find_last_of('/');
-  if (path_split_pos == std::string::npos) {
-    path_split_pos = inpath.find_last_of('\\');
-  }
-  // get real path
-  char real_path[PATH_MAX] = {0};
-  if (path_split_pos != std::string::npos) {
-    std::string prefix_path = inpath.substr(0, path_split_pos);
-    if (prefix_path.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
-    auto ret = CreateNotExistDirs(prefix_path);
-    if (ret == false) {
-      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
-      return false;
-    }
-
-    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
-      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
-      return false;
-    }
-    *outpath = std::string(real_path) + last_path;
-  }
-
-  if (path_split_pos == std::string::npos) {
-    if (inpath.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    if (nullptr == realpath(inpath.c_str(), real_path)) {
-      MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
-    }
-    *outpath = std::string(real_path);
-  }
-
-  return true;
-}
-
-bool Dump::CreateNotExistDirs(const std::string &path) {
-  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
-  MS_EXCEPTION_IF_NULL(fs);
-  char temp_path[PATH_MAX] = {0};
-  if (path.length() > PATH_MAX) {
-    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
-    return false;
-  }
-  for (uint32_t i = 0; i < path.length(); i++) {
-    temp_path[i] = path[i];
-    if (temp_path[i] == '\\' || temp_path[i] == '/') {
-      if (i != 0) {
-        char tmp_char = temp_path[i];
-        temp_path[i] = '\0';
-        std::string path_handle(temp_path);
-        if (!fs->FileExist(temp_path)) {
-          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
-          if (!fs->CreateDir(temp_path)) {
-            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
-            return false;
-          }
-        }
-        temp_path[i] = tmp_char;
-      }
-    }
-  }
-
-  if (!fs->FileExist(path)) {
-    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
-    if (!fs->CreateDir(path)) {
-      MS_LOG(ERROR) << "Create " << path << " dir error";
-      return false;
-    }
-  }
-
-  return true;
-}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/e2e_dump.h b/mindspore/ccsrc/debug/e2e_dump.h
index 4c3e8308da..acde1626cb 100644
--- a/mindspore/ccsrc/debug/e2e_dump.h
+++ b/mindspore/ccsrc/debug/e2e_dump.h
@@ -59,10 +59,6 @@ class Dump {
   uint32_t cur_iter_;
   std::vector<std::string> dump_kernels_;
 
-  static bool GetRealPath(const std::string &inpath, std::string *outpath);
-
-  static bool CreateNotExistDirs(const std::string &path);
-
  private:
   bool ParseDumpConfig(const std::string &dump_config_file);
   bool IsConfigExist(const nlohmann::json &dumpSettings);
diff --git a/mindspore/ccsrc/debug/info.h b/mindspore/ccsrc/debug/info.h
index c09c6031b3..39475a4606 100644
--- a/mindspore/ccsrc/debug/info.h
+++ b/mindspore/ccsrc/debug/info.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "debug/trace_info.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h
index 9704d69089..00af203208 100644
--- a/mindspore/ccsrc/debug/tensor_data.h
+++ b/mindspore/ccsrc/debug/tensor_data.h
@@ -51,25 +51,13 @@ class TensorData {
 
   int GetExecutionOrder() { return this->execution_order; }
 
-  int SetExecutionOrder(int execution_order) {
-    this->execution_order = execution_order;
-    return true;
-  }
+  void SetExecutionOrder(int execution_order) { this->execution_order = execution_order; }
 
-  int SetName(const std::string &name) {
-    this->name = name;
-    return true;
-  }
+  void SetName(const std::string &name) { this->name = name; }
 
-  bool SetTensor(mindspore::tensor::TensorPtr out_tensor) {
-    this->tensor_ptr = out_tensor;
-    return true;
-  }
+  void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; }
 
-  bool SetSlot(size_t slot) {
-    this->slot = slot;
-    return true;
-  }
+  void SetSlot(size_t slot) { this->slot = slot; }
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h
index 6c3ea67a78..ae0e89aae2 100644
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -19,17 +19,28 @@
 #include <memory>
 #include <vector>
 #include <map>
+#include <mutex>
 #include <tuple>
 #include <string>
+#include <utility>
 #include "debug/tensor_data.h"
 namespace mindspore {
 class TensorLoader {
  public:
   TensorLoader() : iter_num(-1) {}
 
-  ~TensorLoader() {}
+  ~TensorLoader() { EmptyTensor(); }
 
-  bool LoadNewTensor(std::shared_ptr<TensorData> tensor) {
+  bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
+    std::lock_guard<std::mutex> lg(lock_);
+    if (keep_prev) {
+      // add prev step tensor into current step map with ":prev" suffix
+      auto handle = prev_tensor_list_map.extract(tensor->GetName());
+      if (!handle.empty()) {
+        handle.key() = tensor->GetName() + ":prev";
+        tensor_list_map.insert(std::move(handle));
+      }
+    }
     tensor_list.push_back(tensor);
     tensor_list_map.insert({tensor->GetName(), tensor});
     return true;
@@ -52,18 +63,23 @@ class TensorLoader {
     }
   }
 
-  bool EmptyTensor() {
-    tensor_list_map.clear();
+  void EmptyTensor() {
+    std::lock_guard<std::mutex> lg(lock_);
+    prev_tensor_list_map.clear();
+    tensor_list_map.swap(prev_tensor_list_map);
     tensor_list.clear();
-    return true;
   }
 
+  void EmptyPrevTensor() { prev_tensor_list_map.clear(); }
+
   void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
 
  private:
   std::vector<std::shared_ptr<TensorData>> tensor_list;
   std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
+  std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;
   uint32_t iter_num;
+  std::mutex lock_;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc
index e12a7b1209..b8d3f0a7c7 100644
--- a/mindspore/ccsrc/debug/trace.cc
+++ b/mindspore/ccsrc/debug/trace.cc
@@ -29,10 +29,10 @@
 
 #include "ir/meta_func_graph.h"
 #include "utils/graph_utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 // namespace to support debug trace infomation
diff --git a/mindspore/ccsrc/debug/trace.h b/mindspore/ccsrc/debug/trace.h
index 9583997e93..7cf45abe30 100644
--- a/mindspore/ccsrc/debug/trace.h
+++ b/mindspore/ccsrc/debug/trace.h
@@ -27,7 +27,7 @@
 #include "debug/info.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/any.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/debug/trace_info.h b/mindspore/ccsrc/debug/trace_info.h
index cf4f0c080a..62908cb449 100644
--- a/mindspore/ccsrc/debug/trace_info.h
+++ b/mindspore/ccsrc/debug/trace_info.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 class TraceInfo;
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
deleted file mode 100644
index 42c611c3af..0000000000
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <string>
-#include "device/ascend/ascend_memory_manager.h"
-#include "device/ascend/ascend_memory_pool.h"
-#include "utils/context/ms_context.h"
-#include "runtime/mem.h"
-namespace mindspore {
-namespace device {
-namespace ascend {
-constexpr uint64_t kAscendDeviceMemGB = 26;
-constexpr uint64_t kAscendMemPoolGB = 4;
-constexpr uint64_t kMemSizeGB = 30;
-constexpr uint64_t kMaxMemSizeGB = 30;
-constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB);
-constexpr uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << kMemSizeGB);
-
-void AscendMemoryManager::MallocDeviceMemory() {
-  auto context_mem = GetDeviceMemSizeFromContext();
-  device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem;
-  static_mem_offset_ = device_mem_size_;
-  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
-  }
-
-  if (context_mem == 0) {
-    device_mem_pool_size_ = kAscendMemPoolSize;
-    ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
-    if (ret != RT_ERROR_NONE) {
-      MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
-    AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
-  }
-}
-
-uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  auto variable_memory_max_size = context->variable_memory_max_size();
-  if (variable_memory_max_size == "0") {
-    return 0;
-  }
-  MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
-  auto pos = variable_memory_max_size.find('*');
-  if (pos == std::string::npos) {
-    MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
-  }
-  auto gb_str = variable_memory_max_size.substr(0, pos);
-  auto gb_var = std::stoull(gb_str);
-  MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
-  if (gb_var > kMaxMemSizeGB || gb_var == 0) {
-    MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB";
-  }
-  return gb_var << kMemSizeGB;
-}
-
-void AscendMemoryManager::FreeDeviceMemory() {
-  if (device_mem_base_ != nullptr) {
-    auto ret = rtFree(device_mem_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_base_ = nullptr;
-  }
-  if (device_mem_pool_base_ != nullptr) {
-    auto ret = rtFree(device_mem_pool_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_pool_base_ = nullptr;
-  }
-}
-
-void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
-  return AscendMemoryPool::GetInstance().AllocTensorMem(size);
-}
-}  // namespace ascend
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
deleted file mode 100644
index 69c6dca576..0000000000
--- a/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/ascend/ascend_memory_pool.h"
-#include "device/ascend/ascend_kernel_runtime.h"
-#include "utils/log_adapter.h"
-
-namespace mindspore {
-namespace device {
-namespace ascend {
-size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
-  if (has_malloc_) {
-    MS_LOG(EXCEPTION) << "Has alloc memory pool memory !";
-  }
-  if (size == 0 || size > free_mem_size_) {
-    MS_LOG(EXCEPTION) << "Failed to alloc memory pool memory !";
-  }
-  *addr = device_mem_pool_base_;
-  if (*addr == nullptr) {
-    MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!";
-  }
-  has_malloc_ = true;
-  free_mem_size_ -= size;
-  return size;
-}
-
-bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
-  MS_EXCEPTION_IF_NULL(addr);
-  has_malloc_ = false;
-  free_mem_size_ = total_mem_size_;
-  return true;
-}
-
-size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
-  if (size == 0) {
-    return DYNAMIC_MEM_ALIGN_SIZE;
-  }
-  return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE;
-}
-
-size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
-
-void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
-  MS_EXCEPTION_IF_NULL(device_mem_pool_base);
-  device_mem_pool_base_ = device_mem_pool_base;
-}
-
-size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; }
-
-size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; }
-}  // namespace ascend
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc
deleted file mode 100644
index 46b574c575..0000000000
--- a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/gpu/distribution/mpi_wrapper.h"
-
-#include <cuda_runtime_api.h>
-#include <string>
-#include "device/gpu/distribution/nccl_wrapper.h"
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-MPIWrapper::MPIWrapper() : rank_id_(0), rank_size_(0), local_rank_id_(0) { Init(); }
-
-MPIWrapper::~MPIWrapper() {
-  int finalized;
-  MPI_Finalized(&finalized);
-  if (finalized == 0) {
-    MPI_Finalize();
-  }
-}
-
-MPIWrapper &MPIWrapper::instance() {
-  static MPIWrapper instance;
-  return instance;
-}
-
-int MPIWrapper::local_rank_id() const { return local_rank_id_; }
-
-void MPIWrapper::Init() {
-  int initialized;
-  CHECK_RET(MPI_Initialized(&initialized), MPI_SUCCESS, "Failed to check mpi initialization status.");
-
-  if (initialized == 0) {
-    MPI_Init(nullptr, nullptr);
-  }
-  CHECK_RET(MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_), MPI_SUCCESS, "Failed to init mpi rank id.");
-  CHECK_RET(MPI_Comm_size(MPI_COMM_WORLD, &rank_size_), MPI_SUCCESS, "Failed to init mpi rank size.");
-  NCCLWrapper::instance().set_rank(rank_id_, rank_size_);
-  AssignLocalRankId();
-
-  ncclUniqueId unique_id;
-  if (rank_id_ == 0) {
-    unique_id = NCCLWrapper::instance().nccl_unique_id();
-  }
-  CHECK_RET(MPI_Bcast(reinterpret_cast<void *>(&unique_id), sizeof(unique_id), MPI_BYTE, 0, MPI_COMM_WORLD),
-            MPI_SUCCESS, "Failed to broadcast nccl unique id.");
-  NCCLWrapper::instance().set_nccl_unique_id(unique_id);
-  return;
-}
-
-void MPIWrapper::AssignLocalRankId() {
-  char host_name[MAX_HOSTNAME_LEN] = {0};
-  CHECK_RET(gethostname(host_name, MAX_HOSTNAME_LEN), 0, "Getting host name failed.");
-  size_t host_hash = std::hash<std::string>()(host_name);
-
-  const int kRankSize = rank_size_;
-  size_t all_host_hashs[kRankSize];
-  all_host_hashs[rank_id_] = host_hash;
-  CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
-            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
-  for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
-    if (global_rank == rank_id_) {
-      break;
-    }
-    if (all_host_hashs[global_rank] == all_host_hashs[rank_id_]) {
-      local_rank_id_++;
-    }
-  }
-  return;
-}
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/CMakeLists.txt b/mindspore/ccsrc/frontend/operator/CMakeLists.txt
similarity index 72%
rename from mindspore/ccsrc/operator/CMakeLists.txt
rename to mindspore/ccsrc/frontend/operator/CMakeLists.txt
index 88bcf0e532..0b6dd77c69 100644
--- a/mindspore/ccsrc/operator/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/operator/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _OPERATOR_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_OPERATOR_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ANALYZER)
-add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_SRC_FILES})
+add_library(_mindspore_frontend_operator_obj OBJECT ${_OPERATOR_SRC_FILES})
diff --git a/mindspore/ccsrc/operator/cc_implementations.cc b/mindspore/ccsrc/frontend/operator/cc_implementations.cc
similarity index 99%
rename from mindspore/ccsrc/operator/cc_implementations.cc
rename to mindspore/ccsrc/frontend/operator/cc_implementations.cc
index 52b71f410f..3ec3455be7 100644
--- a/mindspore/ccsrc/operator/cc_implementations.cc
+++ b/mindspore/ccsrc/frontend/operator/cc_implementations.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/cc_implementations.h"
+#include "frontend/operator/cc_implementations.h"
 #include <cassert>
 #include <limits>
 #include <algorithm>
diff --git a/mindspore/ccsrc/operator/cc_implementations.h b/mindspore/ccsrc/frontend/operator/cc_implementations.h
similarity index 100%
rename from mindspore/ccsrc/operator/cc_implementations.h
rename to mindspore/ccsrc/frontend/operator/cc_implementations.h
diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/frontend/operator/composite/composite.cc
similarity index 99%
rename from mindspore/ccsrc/operator/composite/composite.cc
rename to mindspore/ccsrc/frontend/operator/composite/composite.cc
index 75532b9fbd..7d2573e50a 100644
--- a/mindspore/ccsrc/operator/composite/composite.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/composite.cc
@@ -17,19 +17,19 @@
  * limitations under the License.
  */
 
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include <algorithm>
 #include <utility>
 #include <sstream>
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 #include "./common.h"
diff --git a/mindspore/ccsrc/operator/composite/composite.h b/mindspore/ccsrc/frontend/operator/composite/composite.h
similarity index 95%
rename from mindspore/ccsrc/operator/composite/composite.h
rename to mindspore/ccsrc/frontend/operator/composite/composite.h
index 5944c81fb0..3821192dba 100644
--- a/mindspore/ccsrc/operator/composite/composite.h
+++ b/mindspore/ccsrc/frontend/operator/composite/composite.h
@@ -26,12 +26,12 @@
 #include <map>
 #include <set>
 #include <memory>
-#include "operator/composite/zip_operation.h"
-#include "operator/composite/list_append_operation.h"
-#include "operator/composite/do_signature.h"
-#include "operator/composite/unpack_call.h"
-#include "operator/composite/multitype_funcgraph.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/operator/composite/zip_operation.h"
+#include "frontend/operator/composite/list_append_operation.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "frontend/operator/composite/unpack_call.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/frontend/operator/composite/do_signature.cc
similarity index 89%
rename from mindspore/ccsrc/operator/composite/do_signature.cc
rename to mindspore/ccsrc/frontend/operator/composite/do_signature.cc
index d9bcef3031..50be3c5b29 100644
--- a/mindspore/ccsrc/operator/composite/do_signature.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/do_signature.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "operator/composite/do_signature.h"
+#include "frontend/operator/composite/do_signature.h"
 #include <algorithm>
 #include <utility>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/anf.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "./common.h"
 #include "pybind_api/api_register.h"
@@ -31,12 +31,10 @@
 namespace mindspore {
 // namespace to support composite operators definition
 namespace prim {
-namespace {
-using PatternListType = std::initializer_list<BaseRef>;
 const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1},    {kNumberTypeInt8, 2},    {kNumberTypeUInt8, 3},
                                            {kNumberTypeInt16, 4},   {kNumberTypeInt32, 5},   {kNumberTypeInt64, 6},
                                            {kNumberTypeFloat16, 7}, {kNumberTypeFloat32, 8}, {kNumberTypeFloat64, 9}};
-
+namespace {
 const std::vector<Signature> &GetSignature(const ValuePtr &function) {
   static const auto empty = std::vector<Signature>();
   if (function->isa<Primitive>() && function->cast<PrimitivePtr>()->has_signature()) {
@@ -108,6 +106,8 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
   TypeId max_type_id = kTypeUnknown;
   size_t max_type_number = 0;
   bool has_int8 = false;
+  bool has_scalar_int32 = false;
+  bool has_scalar_float32 = false;
   for (const auto &index : indices) {
     TypeId arg_type_id = kTypeUnknown;
     TypeId arg_type = kTypeUnknown;
@@ -116,6 +116,11 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
       continue;
     }
     if (arg_type != kObjectTypeTensorType) {
+      if (arg_type_id == kNumberTypeInt32) {
+        has_scalar_int32 = true;
+      } else if (arg_type_id == kNumberTypeFloat32) {
+        has_scalar_float32 = true;
+      }
       continue;
     }
     auto it = type_map.find(arg_type_id);
@@ -137,6 +142,17 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
   if (max_type_id == kNumberTypeUInt8 && has_int8 == true) {
     max_type_id = kNumberTypeInt16;
   }
+  // if bool is the max type, see if there is scalar input
+  // if so, it means that max is bool tensor, use scalar type instead.
+  // for example: Tensor([True, True]) * 2, expect result is Tensor([2, 2])
+  if (max_type_id == kNumberTypeBool) {
+    if (has_scalar_int32) {
+      max_type_id = kNumberTypeInt32;
+    }
+    if (has_scalar_float32) {
+      max_type_id = kNumberTypeFloat32;
+    }
+  }
   return max_type_id;
 }
 
@@ -225,11 +241,7 @@ void DoAutoCast(const std::string &func_name, const std::vector<Signature> &sign
         if (it_name_map == type_name_map.end()) {
           continue;
         }
-        MS_LOG(EXCEPTION) << "In op '" << func_name << "', \n"
-                          << "the type of writable argument is '" << it_map->second << "', "
-                          << "but the largest type in the same SignatureEumDtype is '" << it_name_map->second
-                          << "'. The writable arg type is not equal to the largest type, "
-                          << "so can not cast automatically.";
+        RaiseExceptionForConvertRefDtype(func_name, it_map->second, it_name_map->second);
       }
       continue;
     }
@@ -313,5 +325,14 @@ FuncGraphPtr DoSignatureMetaFuncGraph::GenerateFuncGraph(const AbstractBasePtrLi
   func_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true);
   return func_graph;
 }
+
+void RaiseExceptionForConvertRefDtype(const std::string &func_name, const std::string &ref_type,
+                                      const std::string &target_type) {
+  MS_LOG(EXCEPTION) << "In op '" << func_name << "', \n"
+                    << "the type of writable argument is '" << ref_type << "', "
+                    << "but the largest type in the same SignatureEumDtype is '" << target_type
+                    << "'. The writable arg type is not equal to the largest type, "
+                    << "so can not cast automatically.";
+}
 }  // namespace prim
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/composite/do_signature.h b/mindspore/ccsrc/frontend/operator/composite/do_signature.h
similarity index 88%
rename from mindspore/ccsrc/operator/composite/do_signature.h
rename to mindspore/ccsrc/frontend/operator/composite/do_signature.h
index 3e1596d63f..9139be806a 100644
--- a/mindspore/ccsrc/operator/composite/do_signature.h
+++ b/mindspore/ccsrc/frontend/operator/composite/do_signature.h
@@ -25,7 +25,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
@@ -56,6 +56,11 @@ class DoSignatureMetaFuncGraph : public MetaFuncGraph {
 };
 using RWSignaturePtr = std::shared_ptr<DoSignatureMetaFuncGraph>;
 
+extern const std::map<TypeId, size_t> type_map;
+
+void RaiseExceptionForConvertRefDtype(const std::string &func_name, const std::string &ref_type,
+                                      const std::string &target_type);
+
 AnfNodePtr GenerateCNode(const FuncGraphPtr &func_graph, const std::string &func_name, const ValuePtr &function,
                          const AbstractBasePtrList &args_spec_list, const AnfNodePtrList &old_node_inputs);
 }  // namespace prim
diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.cc b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
similarity index 93%
rename from mindspore/ccsrc/operator/composite/list_append_operation.cc
rename to mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
index 236a5b7062..3dfe2e23d0 100644
--- a/mindspore/ccsrc/operator/composite/list_append_operation.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "operator/composite/list_append_operation.h"
+#include "frontend/operator/composite/list_append_operation.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "optimizer/opt.h"
+#include "abstract/param_validator.h"
+#include "frontend/optimizer/opt.h"
 #include "pybind_api/api_register.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.h b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.h
similarity index 100%
rename from mindspore/ccsrc/operator/composite/list_append_operation.h
rename to mindspore/ccsrc/frontend/operator/composite/list_append_operation.h
diff --git a/mindspore/ccsrc/operator/composite/map.cc b/mindspore/ccsrc/frontend/operator/composite/map.cc
similarity index 97%
rename from mindspore/ccsrc/operator/composite/map.cc
rename to mindspore/ccsrc/frontend/operator/composite/map.cc
index 2149285323..a5f674187b 100644
--- a/mindspore/ccsrc/operator/composite/map.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/map.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/composite/map.h"
+#include "frontend/operator/composite/map.h"
 #include <algorithm>
 #include <memory>
 #include <utility>
@@ -22,12 +22,12 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
 #include "pybind_api/api_register.h"
 #include "debug/trace.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/map.h b/mindspore/ccsrc/frontend/operator/composite/map.h
similarity index 98%
rename from mindspore/ccsrc/operator/composite/map.h
rename to mindspore/ccsrc/frontend/operator/composite/map.h
index 02d374214a..428014f9c4 100644
--- a/mindspore/ccsrc/operator/composite/map.h
+++ b/mindspore/ccsrc/frontend/operator/composite/map.h
@@ -24,7 +24,7 @@
 
 #include "ir/dtype.h"
 #include "ir/meta_func_graph.h"
-#include "operator/composite/multitype_funcgraph.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
 
 namespace mindspore {
 // namespace to support composite operators definition
diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
similarity index 76%
rename from mindspore/ccsrc/operator/composite/multitype_funcgraph.cc
rename to mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
index de6526f642..ba0d3d9ebb 100644
--- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
@@ -17,19 +17,20 @@
  * limitations under the License.
  */
 
-#include "operator/composite/multitype_funcgraph.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
 #include <algorithm>
 #include <utility>
 #include <sstream>
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
+#include "utils/context/ms_context.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 #include "./common.h"
@@ -115,36 +116,43 @@ const py::function MultitypeFuncGraph::SignMatch(const TypePtrList &types) {
     }
     return item.second;
   }
-  // Try best match
-  py::function py_fn_subclass;
-  size_t subclass_match_cnt = 0;
-  for (auto &item : fn_cache_py_) {
-    TypePtrList sign = item.first;
-    if (sign.size() != types.size()) {
-      continue;
+  return py::none();
+}
+
+FuncGraphPtr GenerateStubFunc(const TypePtrList &types) {
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  bool enable_sparse = context->enable_sparse();
+  if (!enable_sparse) {
+    return nullptr;
+  }
+
+  std::vector<AnfNodePtr> parameters;
+  ParameterPtr undetermined_param = nullptr;
+  auto stub = std::make_shared<FuncGraph>();
+  for (size_t i = 0; i < types.size(); ++i) {
+    auto param = stub->add_parameter();
+    parameters.push_back(param);
+    if (types[i]->type_id() == kObjectTypeUndeterminedType) {
+      undetermined_param = param;
     }
-    auto match = true;
-    for (size_t i = 0; i < sign.size(); ++i) {
-      if (!IsIdentidityOrSubclass(UnwrapRef(types[i]), sign[i]) &&
-          !IsParentOrChildrenType(UnwrapRef(types[i]), sign[i])) {
-        match = false;
-        break;
+  }
+  if (undetermined_param != nullptr) {
+    std::vector<AnfNodePtr> inputs{NewValueNode(prim::kPrimMakeTuple)};
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (types[i]->type_id() == kObjectTypeFunction) {
+        std::vector<AnfNodePtr> call_prim{parameters[i], undetermined_param};
+        inputs.push_back(stub->NewCNode(call_prim));
+      } else {
+        inputs.push_back(parameters[i]);
       }
     }
-    if (!match) {
-      continue;
-    }
-    py_fn_subclass = item.second;
-    subclass_match_cnt++;
-  }
-  if (subclass_match_cnt > 1) {
-    MS_LOG(EXCEPTION) << "There are more than one prototypes for overload function match by subclass";
-  }
-  if (subclass_match_cnt == 1) {
-    MS_LOG(DEBUG) << "Found one subclass match";
-    return py_fn_subclass;
+    auto stub_output = stub->NewCNode(inputs);
+    stub->set_output(stub_output);
+    stub->set_stub(true);
+    return stub;
   }
-  return py::none();
+  return nullptr;
 }
 
 FuncGraphPtr MultitypeFuncGraph::GenerateFromTypes(const TypePtrList &types) {
@@ -159,6 +167,11 @@ FuncGraphPtr MultitypeFuncGraph::GenerateFromTypes(const TypePtrList &types) {
     MS_LOG(DEBUG) << "Find overload function " << buffer.str() << ", function: " << func_graph->ToString();
     return func_graph;
   }
+  auto stub = GenerateStubFunc(types);
+  if (stub != nullptr) {
+    MS_LOG(DEBUG) << "GenerateStubFunc " << buffer.str() << ", function: " << stub->ToString();
+    return stub;
+  }
   std::ostringstream oss;
   oss << "There are " << fn_cache_py_.size() << " prototypes for overload function `" << name_
       << "`, corresponding location info:\n";
diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
similarity index 97%
rename from mindspore/ccsrc/operator/composite/multitype_funcgraph.h
rename to mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
index ababf21883..2139a0e9d1 100644
--- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h
+++ b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
@@ -26,7 +26,7 @@
 #include <map>
 #include <set>
 #include <memory>
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "ir/dtype.h"
 #include "ir/meta_func_graph.h"
diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
similarity index 93%
rename from mindspore/ccsrc/operator/composite/unpack_call.cc
rename to mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
index 3993d41597..2c9e0b538f 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "operator/composite/unpack_call.h"
+#include "frontend/operator/composite/unpack_call.h"
 #include <algorithm>
 #include <utility>
 
 #include "./common.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
+#include "abstract/abstract_value.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
 #include "ir/anf.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 
diff --git a/mindspore/ccsrc/operator/composite/unpack_call.h b/mindspore/ccsrc/frontend/operator/composite/unpack_call.h
similarity index 96%
rename from mindspore/ccsrc/operator/composite/unpack_call.h
rename to mindspore/ccsrc/frontend/operator/composite/unpack_call.h
index 8c055a9386..79c2600f36 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.h
+++ b/mindspore/ccsrc/frontend/operator/composite/unpack_call.h
@@ -25,7 +25,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/composite/zip_operation.cc b/mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
similarity index 94%
rename from mindspore/ccsrc/operator/composite/zip_operation.cc
rename to mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
index 38f2b51614..9e2b6d28b2 100644
--- a/mindspore/ccsrc/operator/composite/zip_operation.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "operator/composite/zip_operation.h"
+#include "frontend/operator/composite/zip_operation.h"
 #include <algorithm>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/anf.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/dshape.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "pybind_api/api_register.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/zip_operation.h b/mindspore/ccsrc/frontend/operator/composite/zip_operation.h
similarity index 97%
rename from mindspore/ccsrc/operator/composite/zip_operation.h
rename to mindspore/ccsrc/frontend/operator/composite/zip_operation.h
index 1a3fa1f5fe..96697cb472 100644
--- a/mindspore/ccsrc/operator/composite/zip_operation.h
+++ b/mindspore/ccsrc/frontend/operator/composite/zip_operation.h
@@ -27,7 +27,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/frontend/operator/ops.cc
similarity index 99%
rename from mindspore/ccsrc/operator/ops.cc
rename to mindspore/ccsrc/frontend/operator/ops.cc
index b682847ed7..5c7672ee3c 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/frontend/operator/ops.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include <memory>
 #include <string>
 
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/frontend/operator/ops.h
similarity index 97%
rename from mindspore/ccsrc/operator/ops.h
rename to mindspore/ccsrc/frontend/operator/ops.h
index f778013896..0dea045a6e 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/frontend/operator/ops.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <memory>
 #include "ir/anf.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 
 namespace mindspore {
 // namespace to support primitive operators
@@ -294,6 +294,12 @@ extern const PrimitivePtr kPrimIndexedSlicesGetIndices;
 extern const PrimitivePtr kPrimIndexedSlicesGetDenseShape;
 extern const PrimitivePtr kPrimIsIndexedSlices;
 
+// attribute 'unroll_flag' of primitive 'switch', when 'unroll_flag' is '0', 'switch' will not unroll
+const char SWITCH_UNROLL_FLAG[] = "unroll_flag";
+// max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
+//  will be sunk(i.e. not unrolled)
+const int MAX_FOR_LOOP_COUNT = 600;
+
 class DoSignaturePrimitive : public Primitive {
  public:
   explicit DoSignaturePrimitive(const std::string &name, const ValuePtr &function)
diff --git a/mindspore/ccsrc/operator/ops_extends.cc b/mindspore/ccsrc/frontend/operator/ops_extends.cc
similarity index 90%
rename from mindspore/ccsrc/operator/ops_extends.cc
rename to mindspore/ccsrc/frontend/operator/ops_extends.cc
index d415b45adf..c406682c3e 100755
--- a/mindspore/ccsrc/operator/ops_extends.cc
+++ b/mindspore/ccsrc/frontend/operator/ops_extends.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include <memory>
 #include <string>
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
 
 namespace mindspore {
 // namespace to support primitive operators
diff --git a/mindspore/ccsrc/operator/prim_arrays.cc b/mindspore/ccsrc/frontend/operator/prim_arrays.cc
similarity index 97%
rename from mindspore/ccsrc/operator/prim_arrays.cc
rename to mindspore/ccsrc/frontend/operator/prim_arrays.cc
index 237ca795eb..caaf1d1b2a 100644
--- a/mindspore/ccsrc/operator/prim_arrays.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_arrays.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "operator/cc_implementations.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "frontend/operator/cc_implementations.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/operator/prim_debug.cc b/mindspore/ccsrc/frontend/operator/prim_debug.cc
similarity index 89%
rename from mindspore/ccsrc/operator/prim_debug.cc
rename to mindspore/ccsrc/frontend/operator/prim_debug.cc
index 5e6cdcc318..718dadf5c1 100644
--- a/mindspore/ccsrc/operator/prim_debug.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_debug.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/prim_maths.cc b/mindspore/ccsrc/frontend/operator/prim_maths.cc
similarity index 90%
rename from mindspore/ccsrc/operator/prim_maths.cc
rename to mindspore/ccsrc/frontend/operator/prim_maths.cc
index 02b86603e7..e4543a3821 100644
--- a/mindspore/ccsrc/operator/prim_maths.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_maths.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/prim_nn.cc b/mindspore/ccsrc/frontend/operator/prim_nn.cc
similarity index 99%
rename from mindspore/ccsrc/operator/prim_nn.cc
rename to mindspore/ccsrc/frontend/operator/prim_nn.cc
index d9a0071757..96c86d815d 100644
--- a/mindspore/ccsrc/operator/prim_nn.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_nn.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/operator/prim_others.cc b/mindspore/ccsrc/frontend/operator/prim_others.cc
similarity index 77%
rename from mindspore/ccsrc/operator/prim_others.cc
rename to mindspore/ccsrc/frontend/operator/prim_others.cc
index ff9ec712bb..530ad6a10c 100644
--- a/mindspore/ccsrc/operator/prim_others.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_others.cc
@@ -19,12 +19,12 @@
 
 #include "ir/dtype.h"
 #include "common/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/utils.h"
-#include "utils/symbolic.h"
+#include "frontend/operator/ops.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "abstract/utils.h"
 #include "utils/context/ms_context.h"
+#include "utils/symbolic.h"
 
 namespace mindspore {
 namespace abstract {
@@ -56,79 +56,6 @@ AbstractBasePtr InferImplJ(const AnalysisEnginePtr &, const PrimitivePtr &primit
   return AbstractFunction::MakeAbstractFunction(jv);
 }
 
-class UndeterminedShapeType {
- public:
-  explicit UndeterminedShapeType(const std::string &env_str) {
-    // param_name indices_shape indices_type values_shape values_type dense_shape
-    // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
-    // 2:Float32:3 1 2"
-    std::vector<string> fields;
-    string tmp;
-    std::stringstream input(env_str);
-    while (std::getline(input, tmp, ':')) {
-      fields.push_back(tmp);
-    }
-    if (fields.size() != fields_num) {
-      MS_LOG(EXCEPTION) << "Expect " << fields_num << " fields, but got " << fields.size();
-    }
-
-    param_name_ = fields[0];
-
-    indices_shape_ = GetShape(fields[1]);
-    indices_type_ = StringToType(fields[2]);
-
-    values_shape_ = GetShape(fields[3]);
-    values_type_ = StringToType(fields[4]);
-
-    auto dense_shape_vec = GetShape(fields[5]);
-    AbstractBasePtrList dense_shape_list;
-    (void)std::transform(dense_shape_vec.begin(), dense_shape_vec.end(), std::back_inserter(dense_shape_list),
-                         [](const auto &elem) { return FromValue(elem, false); });
-    dense_shape_ = dense_shape_list;
-  }
-  ~UndeterminedShapeType() = default;
-  const std::string &param_name() { return param_name_; }
-  const std::vector<int> &indices_shape() { return indices_shape_; }
-  const TypePtr &indices_type() { return indices_type_; }
-  const std::vector<int> &values_shape() { return values_shape_; }
-  const TypePtr &values_type() { return values_type_; }
-  const AbstractBasePtrList &dense_shape() { return dense_shape_; }
-
- private:
-  std::string param_name_;
-  std::vector<int> indices_shape_;
-  TypePtr indices_type_;
-  std::vector<int> values_shape_;
-  TypePtr values_type_;
-  AbstractBasePtrList dense_shape_;
-  static const size_t fields_num;
-
-  std::vector<int> GetShape(const std::string &shape_str);
-};
-std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
-  std::vector<int> ret;
-  std::istringstream iss(shape_str);
-  int elem;
-  while (iss.good()) {
-    iss >> elem;
-    ret.emplace_back(elem);
-  }
-  return ret;
-}
-const size_t UndeterminedShapeType::fields_num = 6;
-
-std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
-void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
-  std::string tmp;
-  std::stringstream input(sparse_shape_types);
-  g_undetermined_configs.clear();
-  while (std::getline(input, tmp, ';')) {
-    auto config = UndeterminedShapeType(tmp);
-    g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
-    MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
-  }
-}
-
 AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                     const AbstractBasePtrList &args_spec_list) {
   MS_EXCEPTION_IF_NULL(primitive);
@@ -142,45 +69,14 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
     MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
   }
 
-  if (!key->sparse_grad().empty()) {
-    // Will be fixed once undetermined type ready
-    if (g_undetermined_configs.empty()) {
-      auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
-      MS_LOG(INFO) << "Undetermind sparse shape:" << sparse_shape_types;
-      if (sparse_shape_types.empty()) {
-        sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
-      }
-      InitUndeterminedFromEnv(sparse_shape_types);
-    }
-
-    auto shape_types = g_undetermined_configs.find(key->sparse_grad());
-    if (shape_types == g_undetermined_configs.end()) {
-      MS_LOG(EXCEPTION) << "Param " << key->ToString()
-                        << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES";
-    }
-    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
-    AbstractBasePtrList sparse_list;
-    // indices
-    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
-    auto indices =
-      std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
-    sparse_list.emplace_back(indices);
-    // values
-    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
-    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
-    sparse_list.emplace_back(dout);
-    // dense_shape
-    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
-    return std::make_shared<AbstractTuple>(sparse_list);
-  }
-
   auto context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag && key->has_indexed_slices_grad() && dflt->isa<AbstractTensor>()) {
+  bool enable_sparse = context->enable_sparse();
+  if (enable_sparse && dflt->isa<AbstractTensor>()) {
     auto dflt_tensor = dflt->cast<AbstractTensorPtr>();
     return std::make_shared<AbstractUndetermined>(dflt_tensor->element()->Clone(), dflt_tensor->shape()->Clone());
   }
+
   if (!key->GetValueTrack()->isa<SymbolicKeyInstance>()) {
     return dflt;
   }
@@ -242,10 +138,7 @@ AbstractBasePtr InferImplMakeRef(const AnalysisEnginePtr &, const PrimitivePtr &
   if (type->type_id() != kObjectTypeRefKey) {
     MS_LOG(EXCEPTION) << "First input of make_ref should be a RefKey but a " << type->ToString();
   }
-  auto ret = std::make_shared<AbstractRef>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
-  ret->set_sparse_grad(args_spec_list[2]->sparse_grad());
-  ret->set_has_indexed_slices_grad(args_spec_list[2]->has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractRef>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
 }
 
 AbstractBasePtr InferImplGetRefKey(const AnalysisEnginePtr &, const PrimitivePtr &,
diff --git a/mindspore/ccsrc/operator/prim_statement.cc b/mindspore/ccsrc/frontend/operator/prim_statement.cc
similarity index 96%
rename from mindspore/ccsrc/operator/prim_statement.cc
rename to mindspore/ccsrc/frontend/operator/prim_statement.cc
index fc40e511e1..bb421bdf8a 100644
--- a/mindspore/ccsrc/operator/prim_statement.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_statement.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
@@ -95,7 +95,7 @@ AbstractBasePtr InferImplDot(const AnalysisEnginePtr &, const PrimitivePtr &prim
   return std::make_shared<AbstractTensor>(input_x->element(), std::make_shared<Shape>(param));
 }
 
-AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &,
+AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &prim,
                                 const AbstractBasePtrList &args_spec_list) {
   // Inputs: condition, true branch, false branch
   if (args_spec_list.size() != 3) {
@@ -108,6 +108,11 @@ AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &,
   auto fb = args_spec_list[2];
   MS_EXCEPTION_IF_NULL(cond);
 
+  auto unroll_flag = prim->GetAttr(prim::SWITCH_UNROLL_FLAG);
+  if (unroll_flag != nullptr && GetValue<int>(unroll_flag) == 0) {
+    return tb->Join(fb);
+  }
+
   ValuePtr v = cond->GetValueTrack();
   MS_EXCEPTION_IF_NULL(v);
   // for tensor as condition, keeps both true and false branch.
diff --git a/mindspore/ccsrc/frontend/operator/prim_structures.cc b/mindspore/ccsrc/frontend/operator/prim_structures.cc
new file mode 100644
index 0000000000..b602b07a0c
--- /dev/null
+++ b/mindspore/ccsrc/frontend/operator/prim_structures.cc
@@ -0,0 +1,712 @@
+/**
+ * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/).
+ *
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pipeline/jit/static_analysis/prim.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/ops.h"
+#include "utils/convert_utils.h"
+#include "ir/tensor_py.h"
+
+using mindspore::tensor::TensorPy;
+
+namespace mindspore {
+namespace abstract {
+
+AbstractBasePtr InferImplStringEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two scalars whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr scalar_x = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractScalarPtr scalar_y = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr value_x = scalar_x->BuildValue();
+  ValuePtr value_y = scalar_y->BuildValue();
+  if (!value_x->isa<StringImm>() || !value_y->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " requires 2 parameters are string, but got param0: " << value_x->ToString()
+                      << ", param1: " << value_y->ToString();
+  }
+
+  bool ret = (value_x->cast<StringImmPtr>()->value() == value_y->cast<StringImmPtr>()->value());
+  return std::make_shared<AbstractScalar>(ret);
+}
+
+AbstractBasePtr InferImplStringConcat(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two scalars whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr scalar_x = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractScalarPtr scalar_y = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr value_x = scalar_x->BuildValue();
+  ValuePtr value_y = scalar_y->BuildValue();
+  if (!value_x->isa<StringImm>() || !value_y->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " requires 2 parameters are string, but got param0: " << value_x->ToString()
+                      << ", param1: " << value_y->ToString();
+  }
+
+  std::string ret = (value_x->cast<StringImmPtr>()->value() + value_y->cast<StringImmPtr>()->value());
+  return std::make_shared<AbstractScalar>(ret);
+}
+
+AbstractBasePtr InferImplMakeTuple(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                   const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractTuple>(args_spec_list);
+}
+
+AbstractBasePtr InferImplMakeList(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractList>(args_spec_list);
+}
+
+AbstractBasePtr InferImplMakeDict(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr keys = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  AbstractTuplePtr values = CheckArg<AbstractTuple>(op_name, args_spec_list, 1);
+
+  size_t keys_size = keys->size();
+  if (values->size() != keys_size) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator keys' size is not equal with values' size";
+  }
+
+  std::vector<AbstractAttribute> key_value;
+  AbstractScalarPtr key;
+  AbstractBasePtrList key_list = keys->elements();
+  AbstractBasePtrList value_list = values->elements();
+  for (size_t index = 0; index < keys_size; index++) {
+    key = CheckArg<AbstractScalar>(op_name + "key", key_list, index);
+    ValuePtr keyPtr = key->BuildValue();
+    MS_EXCEPTION_IF_NULL(keyPtr);
+    if (!keyPtr->isa<StringImm>()) {
+      MS_LOG(EXCEPTION) << op_name << " evaluator keys should be string, but got " << keyPtr->ToString();
+    }
+    std::string key_string = GetValue<std::string>(keyPtr);
+    key_value.emplace_back(key_string, value_list[index]);
+  }
+  return std::make_shared<AbstractDictionary>(key_value);
+}
+
+AbstractBasePtr InferImplMakeKwarg(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a string and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+
+  ValuePtr keyPtr = key->BuildValue();
+  if (!keyPtr->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << keyPtr->ToString();
+  }
+  std::string key_string = GetValue<std::string>(keyPtr);
+  return std::make_shared<AbstractKeywordArg>(key_string, args_spec_list[1]);
+}
+
+AbstractBasePtr InferImplExtractKwarg(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a string and a keyword.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractKeywordArgPtr kwarg = CheckArg<AbstractKeywordArg>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  std::string key_input = GetValue<std::string>(key_value);
+  std::string key_actual = kwarg->get_key();
+  if (key_actual != key_input) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator input key should be same as AbstractKeywordArg' key, but input is "
+                      << key_input << ", AbstractKeywordArg' key is " << key_actual;
+  }
+  return kwarg->get_arg();
+}
+
+AbstractBasePtr InferImplMakeSlice(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  // Inputs: three scalars whose value is an int32 number.
+  CheckArgsSize(primitive->name(), args_spec_list, 3);
+  size_t args_size = args_spec_list.size();
+  for (size_t index = 0; index < args_size; index++) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[index]);
+    if (!args_spec_list[index]->isa<AbstractScalar>() && !args_spec_list[index]->isa<AbstractNone>()) {
+      MS_LOG(EXCEPTION) << "MakeSlice eval " << index << " parameter is neither AbstractScalar nor AbstractNone.";
+    }
+    if (args_spec_list[index]->isa<AbstractScalar>() &&
+        !dyn_cast<AbstractScalar>(args_spec_list[index])->BuildValue()->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "MakeSlice eval " << index << " parameter is an AbstractScalar, but is not an int32 number.";
+    }
+  }
+  // Slice: start, end, step
+  return std::make_shared<AbstractSlice>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
+}
+
+// Eval the return type of make_record
+AbstractBasePtr InferImplMakeRecord(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: at lease two objects of a subclass of AbstractBase.
+  if (args_spec_list.size() < 2) {
+    MS_LOG(EXCEPTION) << "Typeof evaluator requires more than 1 parameter, while the input size is "
+                      << args_spec_list.size() << ".";
+  }
+
+  // args_spec_list[0] maybe AbstractScalarPtr or AbstractTypePtr
+  MS_EXCEPTION_IF_NULL(args_spec_list[0]);
+  TypePtr type = args_spec_list[0]->GetTypeTrack();
+  MS_EXCEPTION_IF_NULL(type);
+  if (type->type_id() != kMetaTypeTypeType) {
+    MS_LOG(EXCEPTION) << "Can not make type(" << type->ToString() << ")not TypeType";
+  }
+
+  ValuePtr value_track = args_spec_list[0]->GetValueTrack();
+  MS_EXCEPTION_IF_NULL(value_track);
+  TypePtr type_ptr = value_track->cast<TypePtr>();
+  if (type_ptr == nullptr) {
+    MS_LOG(EXCEPTION) << "Value type error, not Me type:" << value_track->ToString();
+  }
+
+  auto cls = dyn_cast<Class>(type_ptr);
+  MS_EXCEPTION_IF_NULL(cls);
+  ClassAttrVector attributes = cls->GetAttributes();
+  CheckArgsSize(primitive->name(), args_spec_list, attributes.size() + 1);
+
+  std::vector<AbstractAttribute> abs_attributes;
+  for (size_t i = 0; i < attributes.size(); i++) {
+    AbstractAttribute elem(attributes[i].first, args_spec_list[i + 1]);
+    abs_attributes.push_back(elem);
+  }
+
+  return std::make_shared<AbstractClass>(cls->tag(), abs_attributes, cls->methods());
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListGetItem(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list and a scalar whose value is an int32 number.
+  CheckArgsSize(op_name, args_spec_list, 2);
+  auto queue = CheckArg<T>(op_name, args_spec_list, 0);
+  AbstractScalarPtr index = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr index_value = index->BuildValue();
+  if (!index_value->isa<Int32Imm>()) {
+    // when index_value is an AnyValue and args_spec_list[0] is a scalar, try to return the type of the first element
+    //  and continue
+    if (dyn_cast<AbstractScalar>(queue->elements()[0]) != nullptr) {
+      return std::make_shared<AbstractScalar>(queue->elements()[0]->BuildType());
+    }
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got "
+                             << index_value->ToString();
+  }
+  int idx_v = GetValue<int>(index_value);
+  std::size_t nelems = queue->elements().size();
+  if (idx_v >= SizeToInt(nelems) || idx_v < -SizeToInt(nelems)) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be in range[-" << SizeToInt(nelems) << ", "
+                             << SizeToInt(nelems) << "), but got " << idx_v << ".";
+  }
+
+  std::size_t uidx_v = 0;
+  if (idx_v >= 0) {
+    uidx_v = IntToSize(idx_v);
+  } else {
+    uidx_v = IntToSize(idx_v + SizeToInt(nelems));
+  }
+  return queue->elements()[uidx_v];
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListSetItem(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list, a scalar whose value is an int32 number and an object of a subclass of AbstractBase.
+  CheckArgsSize(op_name, args_spec_list, 3);
+  auto queue = CheckArg<T>(op_name, args_spec_list, 0);
+  AbstractScalarPtr index = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr index_value = index->BuildValue();
+  if (!index_value->isa<Int32Imm>()) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got "
+                             << index_value->ToString();
+  }
+  int idx_v = GetValue<int>(index_value);
+  if (idx_v < 0) {
+    MS_EXCEPTION(IndexError) << "The index of " << typeid(T).name() << " should be positive number, but got " << idx_v
+                             << ".";
+  }
+
+  size_t uidx_v = IntToSize(idx_v);
+  AbstractBasePtrList elements = queue->elements();
+  std::size_t nelems = elements.size();
+  if (uidx_v >= nelems) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator the index: " << uidx_v << " to set out of range: " << nelems - 1
+                             << ".";
+  }
+  elements[uidx_v] = args_spec_list[2];
+  return std::make_shared<T>(elements);
+}
+
+AbstractBasePtr InferImplTupleGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListGetItem<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListGetItem<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplTupleSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListSetItem<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListSetItem<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplDictGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a dict and a scalar whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractDictionaryPtr dict = CheckArg<AbstractDictionary>(op_name, args_spec_list, 0);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  auto key_str = GetValue<std::string>(key_value);
+  std::vector<AbstractAttribute> dict_elems = dict->elements();
+  auto it = std::find_if(dict_elems.begin(), dict_elems.end(),
+                         [key_str](const AbstractAttribute &item) { return item.first == key_str; });
+
+  if (it == dict_elems.end()) {
+    MS_LOG(EXCEPTION) << "The key " << key_str << " does not exist in the dict:" << args_spec_list[0]->ToString();
+  }
+  return it->second;
+}
+
+AbstractBasePtr InferImplDictSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a dict and a scalar whose value is a string and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 3);
+  AbstractDictionaryPtr dict = CheckArg<AbstractDictionary>(op_name, args_spec_list, 0);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  std::string key_str = GetValue<std::string>(key_value);
+  std::vector<AbstractAttribute> dict_elems = dict->elements();
+  auto it = std::find_if(dict_elems.begin(), dict_elems.end(),
+                         [key_str](AbstractAttribute &item) { return item.first == key_str; });
+
+  MS_EXCEPTION_IF_NULL(args_spec_list[2]);
+  auto new_ele = std::make_pair(key_str, args_spec_list[2]);
+  if (it != dict_elems.end()) {
+    int index = it - dict_elems.begin();
+    dict_elems[IntToSize(index)] = new_ele;
+  } else {
+    dict_elems.push_back(new_ele);
+  }
+  return std::make_shared<AbstractDictionary>(dict_elems);
+}
+
+AbstractBasePtr InferImplListAppend(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a list and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractListPtr list = CheckArg<AbstractList>(op_name, args_spec_list, 0);
+  (void)AbstractJoin(list->elements());
+  return list;
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListOrDictLen(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list or dict.
+  CheckArgsSize(op_name, args_spec_list, 1);
+  auto arg = CheckArg<T>(op_name, args_spec_list, 0);
+  return std::make_shared<AbstractScalar>(SizeToInt(arg->size()));
+}
+
+AbstractBasePtr InferImplTupleLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplDictLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractDictionary>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplArrayLen(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractScalar>(kAnyValue, kInt32);
+}
+
+AbstractBasePtr InferImplListMap(const AnalysisEnginePtr &engine, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  // Inputs: fn, list1, list2, ...
+  MS_EXCEPTION_IF_NULL(engine);
+  if (args_spec_list.size() <= 1) {
+    MS_LOG(EXCEPTION) << "List_map requires at least 1 list. while the input size is  " << args_spec_list.size() << ".";
+  }
+  AbstractFunctionPtr fn = CheckArg<AbstractFunction>(primitive->name(), args_spec_list, 0);
+  // check args from 1.
+  CheckArgsSpec<AbstractList>(AbstractBasePtrList(args_spec_list.begin() + 1, args_spec_list.end()));
+
+  AbstractBasePtrList subargs;
+  for (std::size_t i = 1; i < args_spec_list.size(); i++) {
+    AbstractListPtr l_ptr = dyn_cast<AbstractList>(args_spec_list[i]);
+    if (l_ptr == nullptr) {
+      MS_LOG(EXCEPTION) << "Argument[" << i << "] of list_map should be a list.";
+    }
+    subargs.push_back(AbstractJoin(l_ptr->elements()));
+  }
+  EvalResultPtr engin_exc = engine->Execute(fn, subargs);
+  AbstractBasePtrList result;
+  for (std::size_t i = 1; i < args_spec_list.size(); i++) {
+    result.push_back(engin_exc->abstract());
+  }
+  return std::make_shared<AbstractList>(result);
+}
+
+AbstractBasePtr InferImplListReduce(const AnalysisEnginePtr &engine, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a fn, a list and an object of a subclass of a AbstractBase.
+  MS_EXCEPTION_IF_NULL(engine);
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 3);
+  AbstractFunctionPtr fn = CheckArg<AbstractFunction>(op_name, args_spec_list, 0);
+  AbstractListPtr lst = CheckArg<AbstractList>(op_name, args_spec_list, 1);
+  AbstractBasePtr dflt = args_spec_list[2];
+
+  AbstractBasePtr list_type = AbstractJoin(lst->elements());
+  auto result1 = engine->Execute(fn, lst->elements());
+  auto result2 = engine->Execute(fn, {dflt, list_type});
+  MS_EXCEPTION_IF_NULL(result1->abstract());
+  MS_EXCEPTION_IF_NULL(result2->abstract());
+  return result1->abstract()->Join(result2->abstract());
+}
+
+AbstractBasePtr InferImplTupleReversed(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                       const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr input = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  auto tuple_elements = input->elements();
+  AbstractBasePtrList elem_list;
+  (void)std::transform(tuple_elements.rbegin(), tuple_elements.rend(), std::back_inserter(elem_list),
+                       [](const AbstractBasePtr &elem) { return elem->Clone(); });
+  return std::make_shared<AbstractTuple>(elem_list);
+}
+
+AbstractBasePtr DoInferReduceShape(const AbstractTuplePtr &x_shape, const ValuePtr &x_shp_value,
+                                   const ValueTuplePtr &axis_value_ptr, const PrimitivePtr &primitive) {
+  size_t x_rank = x_shape->size();
+  std::set<int> axis_set;
+  auto axis_data = axis_value_ptr->value();
+  if (axis_data.empty()) {
+    int size = 1;
+    AbstractBasePtrList values(x_rank, std::make_shared<AbstractScalar>(size));
+    return std::make_shared<AbstractTuple>(values);
+  }
+
+  for (auto &elem : axis_data) {
+    int e_value = CheckAxis(primitive->name(), elem, -SizeToInt(x_rank), SizeToInt(x_rank) - 1);
+    (void)axis_set.insert(e_value);
+  }
+
+  auto x_shp_data = x_shp_value->cast<ValueTuplePtr>()->value();
+  if (x_shp_data.size() < x_rank) {
+    MS_LOG(EXCEPTION) << "x_shape_data.size() " << x_shp_data.size() << " less than x_shape.size() " << x_rank;
+  }
+  AbstractBasePtrList values;
+  for (size_t i = 0; i < x_rank; i++) {
+    if (axis_set.count(SizeToInt(i)) || axis_set.count(SizeToInt(i) - SizeToInt(x_rank))) {
+      auto axis_v = MakeValue(1);
+      values.push_back(std::make_shared<AbstractScalar>(axis_v, axis_v->type()));
+    } else {
+      int dim_value = x_shp_data[i]->cast<Int32ImmPtr>()->value();
+      auto dim = MakeValue(dim_value);
+      values.push_back(std::make_shared<AbstractScalar>(dim, dim->type()));
+    }
+  }
+
+  return std::make_shared<AbstractTuple>(values);
+}
+
+AbstractBasePtr InferImplReduceShape(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: x_shape, axis
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  MS_EXCEPTION_IF_NULL(args_spec_list[1]);
+
+  auto x_shp_value = shape_x->BuildValue();
+  if (x_shp_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << op_name
+                      << " evaluator shape's data field can't be anything: " << args_spec_list[1]->ToString();
+  }
+
+  // Axis can be scalar, tuple or None
+  AbstractTuplePtr axis = nullptr;
+  if (args_spec_list[1]->isa<AbstractScalar>()) {
+    MS_LOG(DEBUG) << op_name << " evaluator second parameter is scalar";
+    AbstractBasePtrList axis_list = {dyn_cast<AbstractScalar>(args_spec_list[1])};
+    axis = std::make_shared<AbstractTuple>(axis_list);
+  } else if (args_spec_list[1]->isa<AbstractTuple>()) {
+    MS_LOG(DEBUG) << op_name << " evaluator second parameter is tuple";
+    axis = args_spec_list[1]->cast<AbstractTuplePtr>();
+  } else {
+    MS_LOG(EXCEPTION) << op_name << " evaluator second parameter should be a scalar or tuple, but got "
+                      << args_spec_list[1]->ToString();
+  }
+
+  auto axis_value = axis->BuildValue();
+  if (axis_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << op_name
+                      << " evaluator shape's data field can't be anything: " << args_spec_list[1]->ToString();
+  }
+  auto axis_value_ptr = axis_value->cast<ValueTuplePtr>();
+  MS_EXCEPTION_IF_NULL(axis_value_ptr);
+
+  return DoInferReduceShape(shape_x, x_shp_value, axis_value_ptr, primitive);
+}
+
+AbstractBasePtr InferImplTupleDiv(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  AbstractTuplePtr div_shp = CheckArg<AbstractTuple>(op_name, args_spec_list, 1);
+  MS_LOG(INFO) << "DivShape input:" << shape_x->ToString() << ", div:" << div_shp->ToString();
+
+  auto div_shp_value = div_shp->BuildValue();
+  if (div_shp_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << args_spec_list[0]->ToString();
+  }
+
+  auto shpx_value = shape_x->BuildValue();
+  if (shpx_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << args_spec_list[1]->ToString();
+  }
+
+  if (div_shp->size() != shape_x->size()) {
+    MS_LOG(EXCEPTION) << "tileshape elems shape must the same div_shp: " << div_shp->size()
+                      << ", shapex: " << shape_x->size() << ".";
+  }
+
+  auto shpx_data = shpx_value->cast<ValueTuplePtr>()->value();
+  auto div_shp_data = div_shp_value->cast<ValueTuplePtr>()->value();
+  AbstractBasePtrList values;
+
+  for (size_t i = 0; i < div_shp_data.size(); i++) {
+    if (div_shp_data[i]->cast<Int32ImmPtr>() == nullptr) {
+      MS_LOG(EXCEPTION) << "div_shp_shape data should be an int32 number, but it's " << args_spec_list[1]->ToString();
+    }
+    int shapex_value = GetValue<int>(shpx_data[i]);
+    int div_value = GetValue<int>(div_shp_data[i]);
+    MS_LOG(DEBUG) << "div_shp_shape data shapex_value :" << shapex_value << " div_value: " << div_value;
+    if (div_value == 0) {
+      MS_LOG(EXCEPTION) << "error: division value should not be 0!";
+    }
+    if ((shapex_value % div_value) != 0) {
+      MS_LOG(EXCEPTION) << "div_shp_shape data shapex must div int:" << shapex_value << " div_value: " << div_value;
+    }
+
+    int result = shapex_value / div_value;
+    auto result_v = MakeValue(result);
+    values.push_back(std::make_shared<AbstractScalar>(result_v, result_v->type()));
+  }
+
+  return std::make_shared<AbstractTuple>(values);
+}
+
+AbstractBasePtr InferImplTuple2Array(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr input = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  py::tuple data_tuple = ValuePtrToPyData(input->BuildValue());
+  py::array data = py::array(data_tuple);
+  auto tensor = TensorPy::MakeTensor(data);
+  auto ret = tensor->ToAbstract();
+  ret->set_value(tensor);
+  MS_LOG(DEBUG) << "Tuple2arry result AbstractTensor: " << ret->ToString();
+  return ret;
+}
+
+AbstractBasePtr InferImplShapeMul(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  // example: tuple = (1, 2, 3), shape_mul(tuple) = 1*2*3 = 6
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  auto shpx_value = shape_x->BuildValue();
+  if (shpx_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << shape_x->ToString();
+  }
+
+  auto shpx_data = shpx_value->cast<ValueTuplePtr>()->value();
+
+  int result = 1;
+  for (size_t i = 0; i < shpx_data.size(); i++) {
+    int value = GetValue<int>(shpx_data[i]);
+    result = IntMulWithOverflowCheck(result, value);
+  }
+
+  auto result_v = MakeValue(result);
+  MS_LOG(DEBUG) << "shape mul result:" << result_v->ToString();
+  return std::make_shared<AbstractScalar>(result_v, result_v->type());
+}
+
+template <typename T>
+AbstractBasePtr InferImplTupleOrListEqual(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples or two lists.
+  CheckArgsSize(op_name, args_spec_list, 2);
+  auto input_x = CheckArg<T>(op_name, args_spec_list, 0);
+  auto input_y = CheckArg<T>(op_name, args_spec_list, 1);
+
+  ValuePtr x_value = input_x->BuildValue();
+  ValuePtr y_value = input_y->BuildValue();
+  return std::make_shared<AbstractScalar>(*x_value == *y_value);
+}
+
+AbstractBasePtr InferImplTupleEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  return InferImplTupleOrListEqual<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  return InferImplTupleOrListEqual<AbstractList>(primitive->name(), args_spec_list);
+}
+
+struct SlideInfo {
+  int start;
+  int step;
+  int stop;
+};
+
+void CalcSlidePara(const AbstractBasePtrList &args_spec_list, SlideInfo *slide) {
+  int arg1 = 0;
+  int arg2 = 0;
+  if (!args_spec_list.empty()) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[0]);
+    auto arg_value = args_spec_list[0]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    arg1 = GetValue<int>(arg_value);
+  }
+
+  if (args_spec_list.size() >= 2) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[1]);
+    auto arg_value = args_spec_list[1]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    arg2 = GetValue<int>(arg_value);
+  }
+
+  if (args_spec_list.size() == 3) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[2]);
+    auto arg_value = args_spec_list[2]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    slide->step = GetValue<int>(arg_value);
+    slide->start = arg1;
+    slide->stop = arg2;
+  }
+
+  if (args_spec_list.size() == 2) {
+    slide->start = arg1;
+    slide->stop = arg2;
+  }
+
+  if (args_spec_list.size() == 1) {
+    slide->stop = arg1;
+  }
+}
+
+AbstractBasePtr InferImplMakeRange(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                   const AbstractBasePtrList &args_spec_list) {
+  if (args_spec_list.empty()) {
+    MS_LOG(EXCEPTION) << "Cannot make range from empty input.";
+  }
+
+  if (args_spec_list.size() > 3) {
+    MS_LOG(EXCEPTION) << "Error args size of make range operational.";
+  }
+
+  SlideInfo slide = {0, 1, 0};
+  CalcSlidePara(args_spec_list, &slide);
+
+  if (slide.step == 0) {
+    MS_LOG(EXCEPTION) << "Error, step value is 0.";
+  }
+
+  AbstractBasePtrList args;
+  if (slide.start <= slide.stop) {
+    if (slide.step <= 0) {
+      MS_LOG(EXCEPTION) << "Error slice[" << slide.start << ", " << slide.stop << ", " << slide.step << "]";
+    }
+    for (int i = slide.start; i < slide.stop; i += slide.step) {
+      args.push_back(abstract::FromValue(i));
+    }
+  } else {
+    if (slide.step >= 0) {
+      MS_LOG(EXCEPTION) << "Error slice[" << slide.start << ", " << slide.stop << ", " << slide.step << "]";
+    }
+    for (int i = slide.start; i > slide.stop; i += slide.step) {
+      args.push_back(abstract::FromValue(i));
+    }
+  }
+
+  return std::make_shared<AbstractTuple>(args);
+}
+
+AbstractBasePtr InferImplStopGradient(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tensor
+  CheckArgsSize(primitive->name(), args_spec_list, 1);
+  return args_spec_list[0]->Clone();
+}
+}  // namespace abstract
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/prim_to_function.cc b/mindspore/ccsrc/frontend/operator/prim_to_function.cc
similarity index 98%
rename from mindspore/ccsrc/operator/prim_to_function.cc
rename to mindspore/ccsrc/frontend/operator/prim_to_function.cc
index 733cdbdb73..7b9592e80e 100644
--- a/mindspore/ccsrc/operator/prim_to_function.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_to_function.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/prim_to_function.h"
+#include "frontend/operator/prim_to_function.h"
 #include <exception>
 #include <iostream>
 #include <string>
diff --git a/mindspore/ccsrc/operator/prim_to_function.h b/mindspore/ccsrc/frontend/operator/prim_to_function.h
similarity index 100%
rename from mindspore/ccsrc/operator/prim_to_function.h
rename to mindspore/ccsrc/frontend/operator/prim_to_function.h
diff --git a/mindspore/ccsrc/optimizer/CMakeLists.txt b/mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
similarity index 71%
rename from mindspore/ccsrc/optimizer/CMakeLists.txt
rename to mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
index 44af01735a..14fda83052 100644
--- a/mindspore/ccsrc/optimizer/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _OPTIMIZER_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_OPTIMIZER_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_OPTIMIZER)
-add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES})
+add_library(_mindspore_frontend_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES})
diff --git a/mindspore/ccsrc/optimizer/ad/adjoint.cc b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/adjoint.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
index ed89aba20e..60ccf28df4 100644
--- a/mindspore/ccsrc/optimizer/ad/adjoint.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/adjoint.h"
+#include "frontend/optimizer/ad/adjoint.h"
 
 #include <utility>
 #include <vector>
 
 #include "ir/anf.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/adjoint.h b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/adjoint.h
rename to mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
index b2dae8e66f..37986e6810 100644
--- a/mindspore/ccsrc/optimizer/ad/adjoint.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
@@ -22,7 +22,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/dfunctor.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
index f9c056a84e..b314b22f81 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 
 #include <memory>
 #include <string>
@@ -25,12 +25,12 @@
 #include "debug/info.h"
 #include "ir/func_graph_cloner.h"
 #include "ir/manager.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
-#include "optimizer/ad/adjoint.h"
-#include "optimizer/opt.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/optimizer/ad/adjoint.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/context/ms_context.h"
 #include "./common.h"
@@ -99,14 +99,14 @@ void DFunctor::BackPropagateFv(const AnfNodePtr &fv, const AnfNodePtr &din) {
       fv_adjoint = anfnode_to_adjoin_indirect_fv_.find(fv);
     }
   }
-  auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
-  fv_adjoint->second->RegisterKUser(key, 1);
+  auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
+  fv_adjoint->second->RegisterKUser(node, 1);
   auto default_val = tape_->NewCNode({NewValueNode(prim::GetPythonOps("zeros_like")), fv_adjoint->second->k()});
   fv_adjoint->second->RegisterKUser(default_val, 1);
-  auto dfv = tape_->NewCNode({NewValueNode(prim::kPrimEnvGetItem), din, key, default_val});
+  auto dfv = tape_->NewCNode({NewValueNode(prim::kPrimEnvGetItem), din, node, default_val});
   MS_LOG(DEBUG) << "BackPropagateFv find adjoint in anfnode_to_adjoin_ or anfnode_to_adjoin_indirect_fv_ fv "
                 << fv->func_graph()->ToString() << " " << fv->ToString() << ".";
-  MS_LOG(DEBUG) << "BackPropagateFv get item from " << din->ToString() << " key " << key->ToString() << ".";
+  MS_LOG(DEBUG) << "BackPropagateFv get item from " << din->ToString() << " key " << node->ToString() << ".";
   fv_adjoint->second->AccumulateDout(dfv);
 }
 
@@ -279,13 +279,13 @@ AnfNodePtr DFunctor::AttachFvDoutToTape(const AnfNodePtr &grad_fv) {
     if (fv_adjoint == anfnode_to_adjoin_.end()) {
       MS_LOG(EXCEPTION) << "AttachFvDoutToTape fv adjoint does not exist " << fv->ToString() << ".";
     }
-    auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
-    fv_adjoint->second->RegisterKUser(key, 1);
+    auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
+    fv_adjoint->second->RegisterKUser(node, 1);
     auto sens = fv_adjoint->second->dout();
     new_grad_fv = tape_->NewCNode({
       NewValueNode(prim::kPrimEnvSetItem),
       new_grad_fv,
-      key,
+      node,
       sens,
     });
     fv_adjoint->second->RegisterDoutUser(new_grad_fv->cast<CNodePtr>(), 3);
@@ -301,13 +301,13 @@ AnfNodePtr DFunctor::AttachIndirectFvDoutToTape(const AnfNodePtr &grad_fv) {
   for (auto &fv_adjoint : anfnode_to_adjoin_indirect_fv_) {
     MS_LOG(DEBUG) << "AttachIndirectFvDoutToTape backprop indirect fv " << fv_adjoint.first->ToString() << " "
                   << primal_graph_->ToString() << ".";
-    auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint.second->k()});
-    fv_adjoint.second->RegisterKUser(key, 1);
+    auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint.second->k()});
+    fv_adjoint.second->RegisterKUser(node, 1);
     auto sens = fv_adjoint.second->dout();
     new_grad_fv = tape_->NewCNode({
       NewValueNode(prim::kPrimEnvSetItem),
       new_grad_fv,
-      key,
+      node,
       sens,
     });
     fv_adjoint.second->RegisterDoutUser(new_grad_fv->cast<CNodePtr>(), 3);
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.h b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
similarity index 93%
rename from mindspore/ccsrc/optimizer/ad/dfunctor.h
rename to mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
index 4fa9cf6bb5..9ee93334e8 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
@@ -28,36 +28,18 @@
 #include "ir/anf.h"
 #include "ir/meta_func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/resource.h"
-#include "optimizer/ad/adjoint.h"
-#include "operator/ops.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/optimizer/ad/adjoint.h"
+#include "frontend/operator/ops.h"
 #include "debug/trace.h"
 
 namespace mindspore {
 namespace ad {
 struct PrimitiveTotalEqual {
   bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const {
-    if (t1->name() != t2->name()) {
-      return false;
-    }
-
-    auto const &attrs1 = t1->attrs();
-    auto const &attrs2 = t2->attrs();
-    if (attrs1.size() != attrs2.size()) {
-      return false;
-    }
-
-    for (auto &attr1 : attrs1) {
-      if (!t2->HasAttr(attr1.first)) {
-        return false;
-      }
-
-      if (!(*(attr1.second) == *(t2->GetAttr(attr1.first)))) {
-        return false;
-      }
-    }
-
-    return true;
+    MS_EXCEPTION_IF_NULL(t1);
+    MS_EXCEPTION_IF_NULL(t2);
+    return *t1 == *t2;
   }
 };
 
diff --git a/mindspore/ccsrc/optimizer/ad/grad.cc b/mindspore/ccsrc/frontend/optimizer/ad/grad.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/grad.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/grad.cc
index d141dc6eea..ef2d7d400a 100644
--- a/mindspore/ccsrc/optimizer/ad/grad.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/grad.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/grad.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "ir/func_graph_cloner.h"
 #include "utils/context/ms_context.h"
 #include "utils/symbolic.h"
diff --git a/mindspore/ccsrc/optimizer/ad/grad.h b/mindspore/ccsrc/frontend/optimizer/ad/grad.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/grad.h
rename to mindspore/ccsrc/frontend/optimizer/ad/grad.h
index a878aa9df7..ee9ab79ffb 100644
--- a/mindspore/ccsrc/optimizer/ad/grad.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/grad.h
@@ -22,7 +22,7 @@
 
 #include "ir/anf.h"
 #include "ir/meta_func_graph.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/kprim.cc b/mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/kprim.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
index 4141fb5413..5ca2ca6c43 100644
--- a/mindspore/ccsrc/optimizer/ad/kprim.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
@@ -20,16 +20,16 @@
 #include <string>
 #include <utility>
 #include "ir/anf.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "ir/meta_func_graph.h"
 #include "ir/func_graph_cloner.h"
 #include "ir/manager.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
-#include "optimizer/ad/dfunctor.h"
-#include "optimizer/opt.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/primitive_utils.h"
 #include "utils/context/ms_context.h"
@@ -232,10 +232,7 @@ FuncGraphPtr KPrim::BpropCut(const ValueNodePtr &value_node, const pipeline::Res
   std::vector<AnfNodePtr> outputs;
 
   auto bprop_cut = std::make_shared<PrimitivePy>("bprop_cut", py::object());
-  if (!prim->is_base()) {
-    PrimitivePyPtr prim_py = dyn_cast<PrimitivePy>(prim);
-    bprop_cut->set_hook(prim_py->hook());
-  }
+  bprop_cut->CopyHookFunction(prim);
 
   auto cell_id = GetValue<std::string>(prim->GetAttr("cell_id"));
   if (cell_id != "") {
diff --git a/mindspore/ccsrc/optimizer/clean.cc b/mindspore/ccsrc/frontend/optimizer/clean.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/clean.cc
rename to mindspore/ccsrc/frontend/optimizer/clean.cc
index bb52273568..e35760ceaf 100644
--- a/mindspore/ccsrc/optimizer/clean.cc
+++ b/mindspore/ccsrc/frontend/optimizer/clean.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/clean.h"
+#include "frontend/optimizer/clean.h"
 #include <map>
 #include <string>
 #include <vector>
@@ -24,7 +24,7 @@
 #include <utility>
 #include "./common.h"
 #include "debug/trace.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 /* namespace to support opt */
@@ -43,26 +43,28 @@ static AbstractBasePtr Reabs(const AbstractBasePtr &t) {
     return nullptr;
   }
 
-  AbstractBasePtr res = t;
   if (t->isa<AbstractClass>()) {
     auto abs_class = dyn_cast<AbstractClass>(t);
     AbstractBasePtrList baselist;
     auto attributes = abs_class->attributes();
     (void)std::transform(attributes.begin(), attributes.end(), std::back_inserter(baselist),
                          [](const AbstractAttribute &item) { return item.second; });
-    res = std::make_shared<AbstractTuple>(baselist);
-  } else if (t->isa<AbstractDictionary>()) {
+    return std::make_shared<AbstractTuple>(baselist);
+  }
+  if (t->isa<AbstractDictionary>()) {
     auto abs_dict = dyn_cast<AbstractDictionary>(t);
     AbstractBasePtrList baselist;
     auto elements = abs_dict->elements();
     (void)std::transform(elements.begin(), elements.end(), std::back_inserter(baselist),
                          [](const AbstractAttribute &item) { return item.second; });
-    res = std::make_shared<AbstractTuple>(baselist);
-  } else if (t->isa<AbstractList>()) {
-    auto abs_dict = dyn_cast<AbstractList>(t);
-    res = std::make_shared<AbstractTuple>(abs_dict->elements());
+    return std::make_shared<AbstractTuple>(baselist);
+  }
+  if (t->isa<AbstractList>()) {
+    auto abs_list = dyn_cast<AbstractList>(t);
+    return std::make_shared<AbstractTuple>(abs_list->elements());
   }
-  return res;
+
+  return nullptr;
 }
 
 AnfNodePtr ConvertGetAttrToTupleGetItem(const CNodePtr &node) {
@@ -376,7 +378,12 @@ bool SimplifyDataStructures(const FuncGraphPtr &root, const FuncGraphManagerPtr
 
   for (auto &node : manager->all_nodes()) {
     auto ret = Reabs(node->abstract());
-    node->set_abstract(ret);
+    if (ret) {
+      MS_LOG(DEBUG) << "Replace " << node->DebugString() << "'s abstract " << node->abstract()->ToString() << " with "
+                    << ret->ToString();
+      node->set_abstract(ret);
+      changed = true;
+    }
   }
   return changed;
 }
diff --git a/mindspore/ccsrc/optimizer/clean.h b/mindspore/ccsrc/frontend/optimizer/clean.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/clean.h
rename to mindspore/ccsrc/frontend/optimizer/clean.h
index 0130ecfb32..54faabaa63 100644
--- a/mindspore/ccsrc/optimizer/clean.h
+++ b/mindspore/ccsrc/frontend/optimizer/clean.h
@@ -21,10 +21,10 @@
 
 #include <memory>
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/any.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/control_depend.cc b/mindspore/ccsrc/frontend/optimizer/control_depend.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/control_depend.cc
rename to mindspore/ccsrc/frontend/optimizer/control_depend.cc
index 0b5c85b1e0..8cc9bdb7f4 100644
--- a/mindspore/ccsrc/optimizer/control_depend.cc
+++ b/mindspore/ccsrc/frontend/optimizer/control_depend.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/control_depend.h"
+#include "frontend/optimizer/control_depend.h"
 
 #include <vector>
 #include <list>
@@ -22,7 +22,7 @@
 #include <memory>
 #include <algorithm>
 
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/control_depend.h b/mindspore/ccsrc/frontend/optimizer/control_depend.h
similarity index 100%
rename from mindspore/ccsrc/optimizer/control_depend.h
rename to mindspore/ccsrc/frontend/optimizer/control_depend.h
diff --git a/mindspore/ccsrc/optimizer/cse.cc b/mindspore/ccsrc/frontend/optimizer/cse.cc
similarity index 99%
rename from mindspore/ccsrc/optimizer/cse.cc
rename to mindspore/ccsrc/frontend/optimizer/cse.cc
index 0b675cca72..4d968d6d74 100644
--- a/mindspore/ccsrc/optimizer/cse.cc
+++ b/mindspore/ccsrc/frontend/optimizer/cse.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/cse.h"
+#include "frontend/optimizer/cse.h"
 #include <vector>
 #include <set>
 #include <unordered_map>
diff --git a/mindspore/ccsrc/optimizer/cse.h b/mindspore/ccsrc/frontend/optimizer/cse.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/cse.h
rename to mindspore/ccsrc/frontend/optimizer/cse.h
index 57163cc5c9..140f592715 100644
--- a/mindspore/ccsrc/optimizer/cse.h
+++ b/mindspore/ccsrc/frontend/optimizer/cse.h
@@ -24,7 +24,7 @@
 #include <memory>
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/graph_kernel_reuse.cc
rename to mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
index dc20ad925e..c157777040 100644
--- a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc
+++ b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/graph_kernel_reuse.h"
+#include "frontend/optimizer/graph_kernel_reuse.h"
 #include <vector>
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
similarity index 93%
rename from mindspore/ccsrc/optimizer/graph_kernel_reuse.h
rename to mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
index ed5cc93d18..a79ef3ce6d 100644
--- a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h
+++ b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
@@ -17,12 +17,11 @@
 #ifndef MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H
 #define MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H
 
-#include <mindspore/ccsrc/session/anf_runtime_algorithm.h>
 #include <unordered_map>
 #include <string>
 #include <vector>
-
-#include "optimizer/optimizer.h"
+#include "mindspore/ccsrc/backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/frontend/optimizer/irpass.cc
similarity index 85%
rename from mindspore/ccsrc/optimizer/irpass.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass.cc
index 166151751f..efc3795a4c 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.cc
@@ -16,33 +16,33 @@
 
 #include <string>
 
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/arithmetic_simplify.h"
-#include "optimizer/irpass/branch_culling.h"
-#include "optimizer/irpass/cast_eliminate.h"
-#include "optimizer/irpass/convert.h"
-#include "optimizer/irpass/env_item_eliminate.h"
-#include "optimizer/irpass/grad_var_prepare.h"
-#include "optimizer/irpass/gradient_eliminate.h"
-#include "optimizer/irpass/inline.h"
-#include "optimizer/irpass/incorporate_call.h"
-#include "optimizer/irpass/incorporate_getitem.h"
-#include "optimizer/irpass/item_tuple_eliminate.h"
-#include "optimizer/irpass/mark_interface_fusion.h"
-#include "optimizer/irpass/merge_addn.h"
-#include "optimizer/irpass/minmax_grad.h"
-#include "optimizer/irpass/param_replace.h"
-#include "optimizer/irpass/partial_eliminate.h"
-#include "optimizer/irpass/reduce_eliminate.h"
-#include "optimizer/irpass/ref_eliminate.h"
-#include "optimizer/irpass/reshape_eliminate.h"
-#include "optimizer/irpass/special_op_eliminate.h"
-#include "optimizer/irpass/specialize_transform.h"
-#include "optimizer/irpass/symbol_resolver.h"
-#include "optimizer/irpass/tile_eliminate.h"
-#include "optimizer/irpass/transpose_eliminate.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass/indexed_slices_eliminate.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/irpass/branch_culling.h"
+#include "frontend/optimizer/irpass/cast_eliminate.h"
+#include "frontend/optimizer/irpass/convert.h"
+#include "frontend/optimizer/irpass/env_item_eliminate.h"
+#include "frontend/optimizer/irpass/grad_var_prepare.h"
+#include "frontend/optimizer/irpass/gradient_eliminate.h"
+#include "frontend/optimizer/irpass/inline.h"
+#include "frontend/optimizer/irpass/incorporate_call.h"
+#include "frontend/optimizer/irpass/incorporate_getitem.h"
+#include "frontend/optimizer/irpass/item_tuple_eliminate.h"
+#include "frontend/optimizer/irpass/mark_interface_fusion.h"
+#include "frontend/optimizer/irpass/merge_addn.h"
+#include "frontend/optimizer/irpass/minmax_grad.h"
+#include "frontend/optimizer/irpass/param_replace.h"
+#include "frontend/optimizer/irpass/partial_eliminate.h"
+#include "frontend/optimizer/irpass/reduce_eliminate.h"
+#include "frontend/optimizer/irpass/ref_eliminate.h"
+#include "frontend/optimizer/irpass/reshape_eliminate.h"
+#include "frontend/optimizer/irpass/special_op_eliminate.h"
+#include "frontend/optimizer/irpass/specialize_transform.h"
+#include "frontend/optimizer/irpass/symbol_resolver.h"
+#include "frontend/optimizer/irpass/tile_eliminate.h"
+#include "frontend/optimizer/irpass/transpose_eliminate.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass/indexed_slices_eliminate.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/frontend/optimizer/irpass.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass.h
rename to mindspore/ccsrc/frontend/optimizer/irpass.h
index 782eae6124..4af8c0789d 100644
--- a/mindspore/ccsrc/optimizer/irpass.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/opt.h"
 #include "ir/visitor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
index b111a6b67a..83f7fae582 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
@@ -19,13 +19,13 @@
 #include <vector>
 #include <functional>
 
-#include "optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
index f4bdb0d655..3088231396 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
@@ -23,10 +23,10 @@
 
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/branch_culling.cc b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/branch_culling.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
index 726f4a28b0..dc580f6b63 100644
--- a/mindspore/ccsrc/optimizer/irpass/branch_culling.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/branch_culling.h"
+#include "frontend/optimizer/irpass/branch_culling.h"
 
 #include <memory>
 #include <utility>
@@ -22,7 +22,7 @@
 
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/branch_culling.h b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/branch_culling.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
index 2b5b30bdbf..b3f3fe4733 100644
--- a/mindspore/ccsrc/optimizer/irpass/branch_culling.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
@@ -24,8 +24,8 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/pattern_matcher.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
similarity index 90%
rename from mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
index a497f3d5bd..ddb84806e1 100644
--- a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/cast_eliminate.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass/cast_eliminate.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/func_graph.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/cast_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
index d98d0b677b..d5222d4310 100644
--- a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_CAST_ELIMINATE_H_
 
 #include "ir/visitor.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/convert.h b/mindspore/ccsrc/frontend/optimizer/irpass/convert.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/convert.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/convert.h
index 3049bafb1e..d887874203 100644
--- a/mindspore/ccsrc/optimizer/irpass/convert.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/convert.h
@@ -19,11 +19,11 @@
 
 #include <vector>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
index 3f100dcaec..14fd8743ff 100644
--- a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
@@ -27,9 +27,9 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
index 317d67e792..44c1b62fa5 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/grad_var_prepare.h"
+#include "frontend/optimizer/irpass/grad_var_prepare.h"
 #include <vector>
 #include <algorithm>
 #include <unordered_map>
 #include <memory>
 
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
similarity index 90%
rename from mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
index 9713017d12..f6992a87c6 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <memory>
 
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
diff --git a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
index 3347fa9dc0..0d98cffa37 100644
--- a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/gradient_eliminate.h"
+#include "frontend/optimizer/irpass/gradient_eliminate.h"
 
 #include <utility>
 
diff --git a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
similarity index 91%
rename from mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
index 671d9bde49..82312d9e37 100644
--- a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
@@ -21,12 +21,12 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 #include "common/utils.h"
-#include "operator/ops.h"
-#include "optimizer/ad/grad.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/ad/grad.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_call.h b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/incorporate_call.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
index 5842b7bfd6..2f6404458f 100644
--- a/mindspore/ccsrc/optimizer/irpass/incorporate_call.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
@@ -22,12 +22,12 @@
 #include <unordered_map>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
index b6c8fb0e18..828e205e4f 100644
--- a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
@@ -27,9 +27,9 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
index 630d567549..dfe345fe01 100644
--- a/mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/inline.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/inline.h
index 64f192347c..8cafb268b4 100644
--- a/mindspore/ccsrc/optimizer/irpass/inline.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h
@@ -21,12 +21,12 @@
 #include <utility>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
@@ -39,7 +39,7 @@ class ReplaceApplicator : public AnfVisitor {
     }
 
     auto fg = GetValueNode<FuncGraphPtr>(node);
-    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) {
+    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) {
       return nullptr;
     }
 
@@ -110,7 +110,7 @@ class InlinerBase : public AnfVisitor {
 
     // G
     auto fg = GetValueNode<FuncGraphPtr>(inputs[0]);
-    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) {
+    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) {
       return nullptr;
     }
     // Do not inline GraphKernel to Cell.
diff --git a/mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
index 202951a254..acd6844ee7 100644
--- a/mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
@@ -23,9 +23,9 @@
 
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h b/mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
index 6f2bcc187f..8d3839bd9e 100644
--- a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
@@ -21,13 +21,13 @@
 #include <sstream>
 #include <unordered_map>
 
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/merge_addn.h b/mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/merge_addn.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
index e1e4b8878b..a3cf6e2231 100644
--- a/mindspore/ccsrc/optimizer/irpass/merge_addn.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
@@ -21,10 +21,10 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/minmax_grad.h b/mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/minmax_grad.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
index a426a9fb9b..658a287234 100644
--- a/mindspore/ccsrc/optimizer/irpass/minmax_grad.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/param_replace.h b/mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/param_replace.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
index c0c4c832d7..999376e528 100644
--- a/mindspore/ccsrc/optimizer/irpass/param_replace.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
@@ -19,11 +19,11 @@
 
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/parse.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/partial_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/partial_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
index bc8ef9d8f3..32fc5abc7d 100644
--- a/mindspore/ccsrc/optimizer/irpass/partial_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
@@ -21,10 +21,10 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/prim_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/prim_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
index 725c30a6b9..d8c96825c9 100644
--- a/mindspore/ccsrc/optimizer/irpass/prim_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
@@ -17,8 +17,8 @@
 #ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_PRIM_ELIMINATE_H_
 #define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_PRIM_ELIMINATE_H_
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
index d2e1d15f91..78b7d3f4f1 100644
--- a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
@@ -21,11 +21,11 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "frontend/operator/ops.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h
new file mode 100644
index 0000000000..86eb4e761d
--- /dev/null
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+#define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+
+#include <memory>
+
+#include "ir/pattern_matcher.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+namespace irpass {
+// {prim::kPrimMakeRef, X, Y, Z} -> Y
+class MakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimMakeRef, x, y, z), y);
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefValue, Parameter} -> Parameter
+// {prim::kPrimGetRefOrigin, Parameter} -> Parameter
+class GetRefParamEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefValue, x), x, x.CheckFunc(IsParam, node));
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefOrigin, x), x, x.CheckFunc(IsParam, node));
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefKey, {prim::kPrimMakeRef, X, Y, Z}} -> X
+// {prim::kPrimGetRefValue, {prim::kPrimMakeRef, X, Y, Z}} -> Y
+// {prim::kPrimGetRefOrigin, {prim::kPrimMakeRef, X, Y, Z}} -> Z
+class GetMakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefKey, PPrimitive(prim::kPrimMakeRef, x, y, z)), x);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefValue, PPrimitive(prim::kPrimMakeRef, x, y, z)), y);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefOrigin, PPrimitive(prim::kPrimMakeRef, x, y, z)), z);
+
+    return nullptr;
+  }
+};
+
+// IsValueNode<RefKey>
+class ReplaceRefkeyByParam : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    auto RefKeyLambda = [&node, &optimizer]() -> AnfNodePtr {
+      auto refkey = GetValueNode<RefKeyPtr>(node);
+      auto resource = std::dynamic_pointer_cast<pipeline::Resource>(optimizer->resource());
+      MS_EXCEPTION_IF_NULL(resource);
+
+      auto top_graph = resource->func_graph();
+      MS_EXCEPTION_IF_NULL(top_graph);
+
+      for (const auto &tnode : top_graph->parameters()) {
+        auto para = tnode->cast<ParameterPtr>();
+        if (para != nullptr && para->name() == refkey->tag()) {
+          return para;
+        }
+      }
+      return nullptr;
+    };
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_LAMBDA_IF(node, x, RefKeyLambda, x.CheckFunc(IsValueNode<RefKey>, node));
+    return nullptr;
+  }
+};
+}  // namespace irpass
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
diff --git a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
index cafc8b796c..27d4bdad3d 100644
--- a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
@@ -22,10 +22,10 @@
 #include "ir/func_graph.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
index b6a4e1c852..01efa85e8d 100644
--- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
@@ -25,10 +25,10 @@
 #include "ir/optimizer_caller.h"
 #include "ir/pattern_matcher.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h b/mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/specialize_transform.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
index 3db9e7bd51..d8a15f6d83 100644
--- a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
@@ -24,13 +24,13 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/manager.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/symbol_resolver.h b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/symbol_resolver.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
index 7b35cf5451..de9e533550 100644
--- a/mindspore/ccsrc/optimizer/irpass/symbol_resolver.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
@@ -20,12 +20,12 @@
 #include <string>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/tile_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/tile_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
index 86ac5bab73..f561e04c10 100644
--- a/mindspore/ccsrc/optimizer/irpass/tile_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
index de196ea619..70b8898462 100644
--- a/mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/opt.cc b/mindspore/ccsrc/frontend/optimizer/opt.cc
similarity index 95%
rename from mindspore/ccsrc/optimizer/opt.cc
rename to mindspore/ccsrc/frontend/optimizer/opt.cc
index 462d08ad3c..44917106fa 100644
--- a/mindspore/ccsrc/optimizer/opt.cc
+++ b/mindspore/ccsrc/frontend/optimizer/opt.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 #include <algorithm>
 #include <deque>
@@ -24,7 +24,7 @@
 
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 #include "utils/log_adapter.h"
 #include "utils/ordered_set.h"
 
@@ -84,13 +84,8 @@ AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNode
   }
 #endif
   if (optimizer != nullptr && optimizer->is_watch_renormalize() && result != nullptr) {
-    if (renorm_action_ == FORCE_RENORM) {
-      optimizer->add_node_to_renormalize(result);
-    } else {
-      // renorm_action_ is CHECK_RENORM
-      if (result->abstract() == nullptr) {
-        optimizer->add_node_to_renormalize(result);
-      }
+    if ((renorm_action_ == FORCE_RENORM) || (result->abstract() == nullptr)) {
+      optimizer->set_is_untyped_generated();
     }
   }
 
diff --git a/mindspore/ccsrc/optimizer/opt.h b/mindspore/ccsrc/frontend/optimizer/opt.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/opt.h
rename to mindspore/ccsrc/frontend/optimizer/opt.h
index 6601d969d2..f440cc71dc 100644
--- a/mindspore/ccsrc/optimizer/opt.h
+++ b/mindspore/ccsrc/frontend/optimizer/opt.h
@@ -24,7 +24,7 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/optimizer_caller.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/frontend/optimizer/optimizer.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/optimizer.h
rename to mindspore/ccsrc/frontend/optimizer/optimizer.h
index dc423ed314..a1f11e74d0 100644
--- a/mindspore/ccsrc/optimizer/optimizer.h
+++ b/mindspore/ccsrc/frontend/optimizer/optimizer.h
@@ -31,9 +31,9 @@
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/trace.h"
-#include "optimizer/opt.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
+#include "frontend/optimizer/opt.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
@@ -89,12 +89,18 @@ using OptPassGroupMap = std::vector<std::pair<std::string, OptPassConfig>>;
 class Optimizer : public std::enable_shared_from_this<Optimizer> {
  public:
   Optimizer(const std::string &name, const pipeline::ResourceBasePtr &resource_ptr)
-      : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false), is_enable_(true) {}
+      : name_(name),
+        resource_(resource_ptr),
+        run_only_once_(false),
+        is_watch_renormalize_(false),
+        is_enable_(true),
+        is_untyped_generated_(false) {}
   virtual ~Optimizer() = default;
 
   void Init(const OptPassGroupMap &passes, bool run_only_once) {
     run_only_once_ = run_only_once;
     is_watch_renormalize_ = false;
+    is_untyped_generated_ = false;
     is_on_debug_ = IS_OUTPUT_ON(mindspore::DEBUG);
 
     for (auto &iter : passes) {
@@ -154,14 +160,14 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
                 // So generate the args_spec from parameters.
                 abstract::AbstractBasePtrList maybe_new_args_spec;
                 if (is_watch_renormalize_) {
-                  if (untyped_nodes_.size() > 0) {
+                  if (is_untyped_generated_) {
                     std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
                                    std::back_inserter(maybe_new_args_spec),
                                    [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); });
                     func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec);
-                    clear_untyped_nodes();
+                    clear_is_untyped_generated();
                   } else {
-                    MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because untyped_nodes_ is empty.";
+                    MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because is_untyped_generated_ is False.";
                   }
                 } else {
                   std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
@@ -206,13 +212,8 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
 
   const std::string name() const { return name_; }
 
-  void add_node_to_renormalize(AnfNodePtr anode) {
-    if (std::find(untyped_nodes_.begin(), untyped_nodes_.end(), anode) == untyped_nodes_.end()) {
-      untyped_nodes_.push_back(anode);
-    }
-  }
-
-  void clear_untyped_nodes() { untyped_nodes_.clear(); }
+  void set_is_untyped_generated() { is_untyped_generated_ = true; }
+  void clear_is_untyped_generated() { is_untyped_generated_ = false; }
 
   void enable_watch_renormalize() { is_watch_renormalize_ = true; }
   void disable_watch_renormalize() { is_watch_renormalize_ = false; }
@@ -232,9 +233,9 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
   std::vector<OptPass> passes_;
   std::vector<std::string> pass_names_;
   bool run_only_once_;
-  std::vector<AnfNodePtr> untyped_nodes_;
   bool is_watch_renormalize_;
   bool is_enable_;
+  bool is_untyped_generated_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/optimizer/pass_group.cc b/mindspore/ccsrc/frontend/optimizer/pass_group.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/pass_group.cc
rename to mindspore/ccsrc/frontend/optimizer/pass_group.cc
index 2d1ab07f7d..3619396215 100644
--- a/mindspore/ccsrc/optimizer/pass_group.cc
+++ b/mindspore/ccsrc/frontend/optimizer/pass_group.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/pass_group.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/pass_group.h b/mindspore/ccsrc/frontend/optimizer/pass_group.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/pass_group.h
rename to mindspore/ccsrc/frontend/optimizer/pass_group.h
index 895f5a4128..08fa8018d6 100644
--- a/mindspore/ccsrc/optimizer/pass_group.h
+++ b/mindspore/ccsrc/frontend/optimizer/pass_group.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <memory>
 
-#include "optimizer/py_pass.h"
+#include "frontend/optimizer/py_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/py_pass.cc b/mindspore/ccsrc/frontend/optimizer/py_pass.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/py_pass.cc
rename to mindspore/ccsrc/frontend/optimizer/py_pass.cc
index 8ce348b22e..c1bf40fcbb 100644
--- a/mindspore/ccsrc/optimizer/py_pass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/py_pass.h"
+#include "frontend/optimizer/py_pass.h"
 #include <unordered_set>
 #include <deque>
 #include <algorithm>
@@ -22,8 +22,8 @@
 
 #include "ir/func_graph.h"
 #include "ir/manager.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace opt {
@@ -54,6 +54,7 @@ void ResolveFuncGraph_(const FuncGraphPtr &fg) {
   auto manager = Manage(fg, false);
   parse::python_adapter::set_use_signature_in_resolve(false);
   parse::ResolveAll(manager);
+  parse::python_adapter::set_use_signature_in_resolve(true);
 }
 
 bool Match(const AnfNodePtr &pattern, const AnfNodePtr &node, const NodeEquivPtr &equiv_ptr) {
diff --git a/mindspore/ccsrc/optimizer/py_pass.h b/mindspore/ccsrc/frontend/optimizer/py_pass.h
similarity index 100%
rename from mindspore/ccsrc/optimizer/py_pass.h
rename to mindspore/ccsrc/frontend/optimizer/py_pass.h
diff --git a/mindspore/ccsrc/optimizer/py_pass_manager.cc b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/py_pass_manager.cc
rename to mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
index 1c36e93c9a..86d7067d1c 100644
--- a/mindspore/ccsrc/optimizer/py_pass_manager.cc
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/py_pass_manager.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 #include <functional>
 #include <algorithm>
@@ -21,7 +21,7 @@
 #include <initializer_list>
 
 #include "ir/manager.h"
-#include "optimizer/pass_group.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/py_pass_manager.h b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/py_pass_manager.h
rename to mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
index eaeefce213..84868862a7 100644
--- a/mindspore/ccsrc/optimizer/py_pass_manager.h
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
@@ -23,13 +23,13 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "utils/graph_utils.h"
 #include "common/utils.h"
 
-#include "pipeline/parse/resolve.h"
-#include "optimizer/py_pass.h"
-#include "optimizer/pass_group.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/optimizer/py_pass.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/parallel/CMakeLists.txt b/mindspore/ccsrc/frontend/parallel/CMakeLists.txt
similarity index 63%
rename from mindspore/ccsrc/parallel/CMakeLists.txt
rename to mindspore/ccsrc/frontend/parallel/CMakeLists.txt
index 940b1ed1d8..d2a099cf41 100644
--- a/mindspore/ccsrc/parallel/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/parallel/CMakeLists.txt
@@ -1,7 +1,8 @@
 file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+list(REMOVE_ITEM _PARALLEL_SRC_FILES  "ps/util.cc" "ps/scheduler.cc" "ps/optimizer_info.cc" "ps/optimizer_info_builder.cc")
 if (ENABLE_DUMP_PROTO)
     list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
 endif ()
 
 set_property(SOURCE ${_PARALLEL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PARALLEL)
-add_library(_mindspore_parallel_obj OBJECT ${_PARALLEL_SRC_FILES})
+add_library(_mindspore_frontend_parallel_obj OBJECT ${_PARALLEL_SRC_FILES})
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
index 30173e533c..70ae5a7d20 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
 #include <memory>
 #include <queue>
 #include <string>
 #include <unordered_set>
 #include "ir/func_graph.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/graph_util/node_info.h"
-#include "parallel/status.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/graph_util/node_info.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
index 43a9935095..7383c477a6 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
@@ -20,8 +20,8 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_graph.h"
-#include "parallel/status.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_graph.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
index 2a98a38add..ca47b0fa97 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_graph.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_graph.h"
 #include <algorithm>
 #include <functional>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
similarity index 97%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
index b2084b735c..a47039f070 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
@@ -24,8 +24,8 @@
 #include <utility>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_node.h"
-#include "parallel/status.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
index 113d4ec59b..1c478887df 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
 #include <queue>
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
index db1c4e3f2e..6538381f27 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
@@ -22,7 +22,7 @@
 #include <unordered_set>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
similarity index 90%
rename from mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
index 999c4a85a9..b669fa7782 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/step_allreduce_fusion.h"
+#include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h"
 #include <ctime>
 #include <string>
-#include "optimizer/optimizer.h"
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
-#include "parallel/context.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/status.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
index 2343a7a2fe..2612e71984 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PARALLEL_ALLREDUCE_FUSION_STEP_ALLREDUCE_FUSION_H_
 #define MINDSPORE_CCSRC_PARALLEL_ALLREDUCE_FUSION_STEP_ALLREDUCE_FUSION_H_
 
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
index 65e9acf714..531a5cd7f6 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
 #include <cmath>
 #include <numeric>
 #include <utility>
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
index 8b92e18cd8..cc4508681b 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
index 72451fab57..9408596111 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
 
 #include <memory>
 #include <utility>
diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
index e3fbfba5a7..812f375f0b 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
index 60256a3ae3..e3f1de7207 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
 
 #include <algorithm>
 #include <functional>
 #include <iterator>
 #include <utility>
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
index 2a5ed3b2a4..3fffd1b86d 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
@@ -23,10 +23,10 @@
 #include <utility>
 #include <vector>
 #include "common/utils.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/tensor_layout/tensor_info.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
index 05be097e6a..1c1fc3a700 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
@@ -21,9 +21,9 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/step_auto_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/step_auto_parallel.h"
 
 namespace mindspore {
 namespace parallel {
@@ -41,7 +41,6 @@ bool FULLY_USE_DEVICES = DEFAULT_FULLY_USE_DEVICES;
 bool ELEMENTWISE_OP_STRA_FOLLOW = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW;
 bool MULTI_SUBGRAPHS = DEFAULT_IS_MULTI_SUBGRAPHS;
 int32_t RUN_PHASE = DEFAULT_RUN_PHASE;
-constexpr char RESHAPEINFO[] = "ReshapeInfo";
 
 void CostGraph::SetDeviceMemoryAndCostParameter() {
   MS_EXCEPTION_IF_NULL(CostModelContext::GetInstance());
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
index 3b8b389d81..87f13e3383 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "../../common.h"
+#include "mindspore/ccsrc/common.h"
 #include "common/utils.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
index 8ebfdb7d13..aaf3fdff3c 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
 
 #include <random>
 #include <algorithm>
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
index a08a4dbb13..dda597bd1f 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
index 9fb79ceee4..0a7e6c59d4 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_cost.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_cost.h"
 
 #include <algorithm>
 #include <iostream>
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
index fb4fc27164..563bf4598a 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index 9de71231c0..68b776155a 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_generate_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h"
 
 #include <algorithm>
 #include <memory>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
@@ -168,12 +168,11 @@ std::vector<std::vector<int32_t>> PrepareGatherV2(const std::vector<std::shared_
                                                   const size_t iter_ops, std::vector<int32_t> s) {
   std::vector<std::vector<int32_t>> strategies;
 
-  int32_t axis = 0;
   auto axis_input = GetValue<int>(ops[iter_ops]->input_value().at(2));
   if (axis_input < 0) {
     axis_input += SizeToInt(ops[iter_ops]->inputs_tensor_info()[0].shape().size());
   }
-  axis = axis_input;
+  int32_t axis = axis_input;
   if (axis >= SizeToInt(s.size())) {
     MS_LOG(EXCEPTION) << "Failure: GatherV2' axis out of range.";
   }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
index e82efe6798..9acd05e0a9 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
similarity index 94%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
index 9007218d15..15b8220016 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
similarity index 91%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
index c0412e9108..a393c825df 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -14,18 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
 
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
@@ -215,23 +214,16 @@ std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
                                       const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
                                       const std::shared_ptr<std::vector<size_t>> &index_list) {
   MS_EXCEPTION_IF_NULL(graph);
-  static const std::set<OperatorType> elementwise_type = {
-    OperatorType::kRecReLU,      OperatorType::kRecLog,      OperatorType::kRecExp,         OperatorType::kRecAdd,
-    OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd,  OperatorType::kRecSub,         OperatorType::kRecMul,
-    OperatorType::kRecDiv,       OperatorType::kRecSqueeze,  OperatorType::kRecReduce,      OperatorType::kRecCast,
-    OperatorType::kRecReshape,   OperatorType::kRecGatherV2, OperatorType::kRecArgWithValue};
   for (size_t node_index = 0; node_index < (size_t)graph->nodes.size(); node_index++) {
     auto type = graph->nodes[node_index].apply.op_type;
-    if (elementwise_type.find(type) != elementwise_type.end()) {
+    if (ElementWiseOpType.find(type) != ElementWiseOpType.end()) {
       Eliminate_Aux(node_index, graph, eli_list);
     }
   }
-
   index_list->reserve(graph->nodes.size());
   for (size_t i = 0; i < (size_t)graph->nodes.size(); i++) {
     index_list->push_back(i);
   }
-
   for (size_t i = 0; i < (size_t)eli_list->size(); i++) {
     if (eli_list->at(i)[0] >= index_list->size()) {
       MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
@@ -241,13 +233,11 @@ std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
       index_list->at(j)--;
     }
   }
-
   std::shared_ptr<Graph> new_graph(new Graph);
   for (size_t i = 0; i < graph->nodes.size(); i++) {
     if (index_list->at(i) > SIZE_MAX / 2) {
       continue;
     }
-
     new_graph->nodes.push_back(graph->nodes[i]);
     auto *node_in = &new_graph->nodes[index_list->at(i)].node_in;
     for (size_t j = node_in->size(); j > 0; j--) {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
similarity index 90%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
index 66fc82b8ce..4d0c02f5fe 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -22,12 +22,19 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <set>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
+static const std::set<OperatorType> ElementWiseOpType = {
+  OperatorType::kRecReLU,      OperatorType::kRecLog,      OperatorType::kRecExp,         OperatorType::kRecAdd,
+  OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd,  OperatorType::kRecSub,         OperatorType::kRecMul,
+  OperatorType::kRecDiv,       OperatorType::kRecSqueeze,  OperatorType::kRecReduce,      OperatorType::kRecCast,
+  OperatorType::kRecReshape,   OperatorType::kRecGatherV2, OperatorType::kRecArgWithValue};
+
 const std::map<std::string, OperatorType> DictOpType{
   {MATMUL, OperatorType::kRecMatMul},
   {CONV2D, OperatorType::kRecConvolution},
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
index d5200f54d8..97d230a49f 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
 
 #include <algorithm>
 #include <cmath>
@@ -25,7 +25,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
similarity index 87%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
index c98f3317f8..528163e4d3 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
@@ -25,10 +25,10 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_cost.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
-#include "parallel/status.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_cost.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_strategy.h
similarity index 100%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_strategy.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_strategy.h
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
similarity index 94%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
index 51ffca4023..315c52c867 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
@@ -17,7 +17,7 @@
 #ifndef PARALLEL_AUTO_PARALLEL_REC_TENSOR_H_
 #define PARALLEL_AUTO_PARALLEL_REC_TENSOR_H_
 
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/frontend/parallel/context.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/context.cc
rename to mindspore/ccsrc/frontend/parallel/context.cc
index 062d814aa0..7164660be0 100644
--- a/mindspore/ccsrc/parallel/context.cc
+++ b/mindspore/ccsrc/frontend/parallel/context.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/context.h"
+#include "frontend/parallel/context.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -25,7 +25,7 @@
 #include <map>
 
 #include "common/utils.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/frontend/parallel/context.h
similarity index 97%
rename from mindspore/ccsrc/parallel/context.h
rename to mindspore/ccsrc/frontend/parallel/context.h
index 6a503ca7ed..1bb40d5c29 100644
--- a/mindspore/ccsrc/parallel/context.h
+++ b/mindspore/ccsrc/frontend/parallel/context.h
@@ -23,13 +23,13 @@
 #include <string>
 #include <vector>
 
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/status.h"
 #include "utils/convert_utils.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "debug/info.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/costmodel_context.cc b/mindspore/ccsrc/frontend/parallel/costmodel_context.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/costmodel_context.cc
rename to mindspore/ccsrc/frontend/parallel/costmodel_context.cc
index 92aff29557..67d087eabd 100644
--- a/mindspore/ccsrc/parallel/costmodel_context.cc
+++ b/mindspore/ccsrc/frontend/parallel/costmodel_context.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/costmodel_context.h"
+#include "frontend/parallel/costmodel_context.h"
 
 #include <memory>
 
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/costmodel_context.h b/mindspore/ccsrc/frontend/parallel/costmodel_context.h
similarity index 100%
rename from mindspore/ccsrc/parallel/costmodel_context.h
rename to mindspore/ccsrc/frontend/parallel/costmodel_context.h
diff --git a/mindspore/ccsrc/parallel/device.h b/mindspore/ccsrc/frontend/parallel/device.h
similarity index 97%
rename from mindspore/ccsrc/parallel/device.h
rename to mindspore/ccsrc/frontend/parallel/device.h
index 8c3174ae55..c9633623d2 100644
--- a/mindspore/ccsrc/parallel/device.h
+++ b/mindspore/ccsrc/frontend/parallel/device.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/device_manager.cc b/mindspore/ccsrc/frontend/parallel/device_manager.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/device_manager.cc
rename to mindspore/ccsrc/frontend/parallel/device_manager.cc
index 45628bec65..d3657afdb8 100644
--- a/mindspore/ccsrc/parallel/device_manager.cc
+++ b/mindspore/ccsrc/frontend/parallel/device_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 #include <algorithm>
 #include <functional>
@@ -23,7 +23,7 @@
 #include <unordered_set>
 #include <vector>
 
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_manager.h b/mindspore/ccsrc/frontend/parallel/device_manager.h
similarity index 95%
rename from mindspore/ccsrc/parallel/device_manager.h
rename to mindspore/ccsrc/frontend/parallel/device_manager.h
index 3afafe6a9c..654acd9dff 100644
--- a/mindspore/ccsrc/parallel/device_manager.h
+++ b/mindspore/ccsrc/frontend/parallel/device_manager.h
@@ -26,11 +26,11 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "parallel/device.h"
-#include "parallel/device_matrix.h"
-#include "parallel/group_manager.h"
-#include "parallel/status.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/group_manager.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_matrix.cc b/mindspore/ccsrc/frontend/parallel/device_matrix.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/device_matrix.cc
rename to mindspore/ccsrc/frontend/parallel/device_matrix.cc
index 3c9467a223..9cc85d9701 100644
--- a/mindspore/ccsrc/parallel/device_matrix.cc
+++ b/mindspore/ccsrc/frontend/parallel/device_matrix.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/device_matrix.h"
+#include "frontend/parallel/device_matrix.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_matrix.h b/mindspore/ccsrc/frontend/parallel/device_matrix.h
similarity index 97%
rename from mindspore/ccsrc/parallel/device_matrix.h
rename to mindspore/ccsrc/frontend/parallel/device_matrix.h
index 295bf33836..f1e7acec39 100644
--- a/mindspore/ccsrc/parallel/device_matrix.h
+++ b/mindspore/ccsrc/frontend/parallel/device_matrix.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/dynamic_creator.h b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
similarity index 96%
rename from mindspore/ccsrc/parallel/dynamic_creator.h
rename to mindspore/ccsrc/frontend/parallel/dynamic_creator.h
index f8e1d62d0a..3ba40fade9 100644
--- a/mindspore/ccsrc/parallel/dynamic_creator.h
+++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <utility>
 
-#include "parallel/ops_info/ops_info_head_files.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/ops_info/ops_info_head_files.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
@@ -132,6 +132,7 @@ REGISTER(SqueezeInfo);
 REGISTER(SigmoidCrossEntropyWithLogitsInfo);
 REGISTER(SquareInfo);
 REGISTER(GatherV2PInfo);
+REGISTER(EmbeddingLookupInfo);
 }  // namespace parallel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
new file mode 100644
index 0000000000..30c25e5f26
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
@@ -0,0 +1,175 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/graph_util/generate_graph.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+using mindspore::tensor::Tensor;
+
+namespace mindspore {
+namespace parallel {
+std::string GetOpPythonPath(const OperatorName &op_name) {
+  // almost all ops are defined in two main paths
+  const std::string ops_module = OP_PATH;
+  const std::string inner_ops_module = INNER_OP_PATH;
+  py::module mod = py::module::import(common::SafeCStr(ops_module));
+  py::module inner_mod = py::module::import(common::SafeCStr(inner_ops_module));
+  if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+    if (!py::hasattr(inner_mod, common::SafeCStr(op_name))) {
+      MS_LOG(EXCEPTION) << ops_module << " or " << inner_ops_module << " don't have op:" << op_name;
+    }
+    return inner_ops_module;
+  }
+  return ops_module;
+}
+
+ValuePtr CreatOpInstance(const OperatorAttrs &attrs, const OperatorName &op_name, const std::string &instance_name) {
+  std::string op_path = GetOpPythonPath(op_name);
+  py::module mod = py::module::import(common::SafeCStr(op_path));
+  if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+    MS_LOG(ERROR) << "Failure: op_path:" << op_path << " don't have attr " << op_name;
+    return nullptr;
+  }
+  std::vector<py::object> arg_list;
+  (void)std::transform(attrs.begin(), attrs.end(), std::back_inserter(arg_list),
+                       [](const Attr &attr) { return ValuePtrToPyData(attr.second); });
+  py::object obj =
+    parse::python_adapter::CallPyFn(GET_OP_FUNCTION_PATH, GET_OP_FUNCTION, op_name, op_path, instance_name, arg_list);
+  ValuePtr op_instance = nullptr;
+  bool succ = parse::ConvertData(obj, &op_instance);
+  if (!succ) {
+    MS_LOG(ERROR) << "Failure:get Python op " << op_path << " from " << op_name << " fail";
+    return nullptr;
+  }
+  return op_instance;
+}
+
+AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr) {
+  auto value_node = NewValueNode(value_ptr);
+  MS_EXCEPTION_IF_NULL(value_node);
+  return value_node->cast<AnfNodePtr>();
+}
+
+static std::unordered_map<int32_t, AnfNodePtr> int_tensor_map = {};
+AnfNodePtr CreateInt32Tensor(int32_t value) {
+  auto it = int_tensor_map.find(value);
+  if (it != int_tensor_map.end()) {
+    return it->second;
+  }
+  mindspore::tensor::TensorPtr tensor_ptr = std::make_shared<tensor::Tensor>(py::int_(value), kInt32);
+  ValuePtr value_ptr = MakeValue(tensor_ptr);
+  auto anf_node_ptr = ValuePtrToAnfNodePtr(value_ptr);
+  int_tensor_map[value] = anf_node_ptr;
+  return anf_node_ptr;
+}
+
+AnfNodePtr CreatTypeInt(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+AnfNodePtr CreatInt32Imm(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int32Imm>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+std::string GetInstanceNameByCNode(const CNodePtr &cnode) {
+  PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (!prim) {
+    MS_LOG(EXCEPTION) << "The first input of the cnode is not a PrimitivePtr.";
+  }
+  std::string instance_name = prim->instance_name();
+  return HashInstanceName(instance_name);
+}
+
+std::string HashInstanceName(const std::string &name) {
+  auto using_hash_name = common::GetEnv(USING_HASH_NAME);
+  std::string instance_name;
+  if ((using_hash_name.empty()) || (using_hash_name == "on")) {
+    instance_name = HashName(name);
+  } else {
+    instance_name = name;
+  }
+  return instance_name;
+}
+
+Status GenerateGraph::Init(const CNodePtr &cnode) {
+  if (!cnode) {
+    MS_LOG(ERROR) << "Init:cnode is nullptr";
+    return FAILED;
+  }
+  cnode_ = cnode;
+  func_graph_ = cnode->func_graph();
+  if (!func_graph_) {
+    MS_LOG(ERROR) << "Init:func_graph_ is nullptr";
+    return FAILED;
+  }
+  manager_ = func_graph_->manager();
+  if (!manager_) {
+    MS_LOG(ERROR) << "Init:manager_ is nullptr";
+    return FAILED;
+  }
+  scope_ = cnode_->scope();
+  if (!scope_) {
+    MS_LOG(ERROR) << "Init:scope_ is nullptr";
+    return FAILED;
+  }
+  virtual_input_node_ = std::make_shared<AnfNode>(nullptr);
+  virtual_input_node_->set_scope(scope_);
+  instance_name_base_ = GetInstanceNameByCNode(cnode_);
+  name_idx_ = 0;
+  return SUCCESS;
+}
+
+AnfNodePtr GenerateGraph::PushBack(const std::vector<AnfNodePtr> &inputs) {
+  CNodePtr cnode = func_graph_->NewCNode(inputs);  // using NewCNode to creat anfnode
+  MS_EXCEPTION_IF_NULL(cnode);
+  cnode->set_scope(scope_);
+  if (inputs.size() < 2) {
+    MS_LOG(EXCEPTION) << "inputs.size() must be more than 1";
+  }
+  (void)manager_->Replace(inputs.at(1), cnode);  // using Replace function to insert cnode after inputs[0]
+  auto new_anf_node_ptr = cnode->cast<AnfNodePtr>();
+  MS_EXCEPTION_IF_NULL(new_anf_node_ptr);
+  return new_anf_node_ptr;
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name, const OperatorAttrs &attrs) {
+  name_idx_++;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + op_name + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name) {
+  name_idx_++;
+  OperatorAttrs attrs;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/graph_util/generate_graph.h b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
similarity index 93%
rename from mindspore/ccsrc/parallel/graph_util/generate_graph.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
index 71227a6e7b..b3ef54a22e 100644
--- a/mindspore/ccsrc/parallel/graph_util/generate_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
@@ -25,9 +25,9 @@
 #include <vector>
 
 #include "./common.h"
-#include "optimizer/opt.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
similarity index 92%
rename from mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
index 32cd106d8e..21298697f4 100644
--- a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/get_parallel_info.h"
+#include "frontend/parallel/graph_util/get_parallel_info.h"
 
 #include <memory>
 #include <string>
@@ -23,10 +23,10 @@
 
 #include "common/utils.h"
 #include "ir/func_graph.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h
similarity index 100%
rename from mindspore/ccsrc/parallel/graph_util/get_parallel_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h
diff --git a/mindspore/ccsrc/parallel/graph_util/graph_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/graph_util/graph_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
index 175413c0fd..45a88c3a23 100644
--- a/mindspore/ccsrc/parallel/graph_util/graph_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/graph_info.h"
+#include "frontend/parallel/graph_util/graph_info.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/draw.h"
diff --git a/mindspore/ccsrc/parallel/graph_util/graph_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.h
similarity index 100%
rename from mindspore/ccsrc/parallel/graph_util/graph_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/graph_info.h
diff --git a/mindspore/ccsrc/parallel/graph_util/node_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
similarity index 78%
rename from mindspore/ccsrc/parallel/graph_util/node_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
index 7298b06832..e50df2818b 100644
--- a/mindspore/ccsrc/parallel/graph_util/node_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/node_info.h"
+#include "frontend/parallel/graph_util/node_info.h"
 
 #include <string>
 
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace parallel {
@@ -38,8 +38,7 @@ bool ParameterRequireGrad(const AnfNodePtr &node_ptr) {
   if (!para_ptr->has_default()) {
     return false;
   }
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(para_ptr->default_param());
-  return py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
+  return para_ptr->default_param()->requires_grad();
 }
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/graph_util/node_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/graph_util/node_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
index bda268e582..6037c466cd 100644
--- a/mindspore/ccsrc/parallel/graph_util/node_info.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PARALLEL_GRAPH_UTIL_NODE_INFO_H_
 
 #include <string>
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/group_manager.cc b/mindspore/ccsrc/frontend/parallel/group_manager.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/group_manager.cc
rename to mindspore/ccsrc/frontend/parallel/group_manager.cc
index 1562cbc140..8929af7b0b 100644
--- a/mindspore/ccsrc/parallel/group_manager.cc
+++ b/mindspore/ccsrc/frontend/parallel/group_manager.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/group_manager.h"
+#include "frontend/parallel/group_manager.h"
 
 #include <algorithm>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 #include "utils/comm_manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/group_manager.h b/mindspore/ccsrc/frontend/parallel/group_manager.h
similarity index 96%
rename from mindspore/ccsrc/parallel/group_manager.h
rename to mindspore/ccsrc/frontend/parallel/group_manager.h
index f763d483cc..b9cf9663b0 100644
--- a/mindspore/ccsrc/parallel/group_manager.h
+++ b/mindspore/ccsrc/frontend/parallel/group_manager.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
-#include "parallel/device.h"
-#include "parallel/status.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/node_check.cc b/mindspore/ccsrc/frontend/parallel/node_check.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/node_check.cc
rename to mindspore/ccsrc/frontend/parallel/node_check.cc
index 6b920f82ec..de29417a4d 100644
--- a/mindspore/ccsrc/parallel/node_check.cc
+++ b/mindspore/ccsrc/frontend/parallel/node_check.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/node_check.h"
+#include "frontend/parallel/node_check.h"
 
 #include <set>
 #include <string>
 
-#include "parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/node_check.h b/mindspore/ccsrc/frontend/parallel/node_check.h
similarity index 100%
rename from mindspore/ccsrc/parallel/node_check.h
rename to mindspore/ccsrc/frontend/parallel/node_check.h
diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/activation_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
index 6bc33677a6..35cac1480c 100644
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/activation_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <utility>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/activation_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
index cd66bf8e8b..a74707efbe 100644
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
index 02c26ea965..1dd9c899ca 100644
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
index 27caacc30c..1d347e4ec1 100644
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
index dac3b0a675..64aceb90f6 100644
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/batch_parallel_info.h"
+#include "frontend/parallel/ops_info/batch_parallel_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
index db6cb206d5..0ba30c385a 100644
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
@@ -22,8 +22,8 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/bias_add_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
index 005edaf7c7..e8b3afba16 100644
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/bias_add_info.h"
+#include "frontend/parallel/ops_info/bias_add_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
similarity index 92%
rename from mindspore/ccsrc/parallel/ops_info/bias_add_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
index 37f555a258..3ede65a3ba 100644
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
@@ -24,9 +24,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
index 8dd2976b04..2829889846 100644
--- a/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
index e88868c772..3b411ccb0e 100644
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/dropout_do_mask_info.h"
+#include "frontend/parallel/ops_info/dropout_do_mask_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,11 +22,11 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "pipeline/resource.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
index c51a0a9513..ea7d590071 100644
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
index 2172c5cd89..e25da9e743 100644
--- a/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
@@ -21,9 +21,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
index 078be08128..4e6e947f68 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/gather_v2_info.h"
+#include "frontend/parallel/ops_info/gather_v2_info.h"
 
 #include <memory>
 #include <utility>
@@ -22,10 +22,10 @@
 
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
index f7aeb6a0d9..b3dc0fab87 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
similarity index 77%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
index 9fb8df0883..eb3c9900f8 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
@@ -14,37 +14,39 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/gather_v2_p_info.h"
+#include "frontend/parallel/ops_info/gather_v2_p_info.h"
 
 #include <vector>
 #include <numeric>
 #include <functional>
 #include <utility>
+#include <algorithm>
 
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
 
 namespace mindspore {
 namespace parallel {
 Status GatherV2PInfo::GetAttrs() {
-  // get axis, the third input is the axis, is a ValueNode
-  if (input_value_.at(2) == nullptr) {
-    MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!";
-    return FAILED;
-  }
-  auto axis = GetValue<int>(input_value_.at(2));
-  // if axis is negative then convert it to positive
-  auto params_shape = inputs_shape_.at(0);
-  if (params_shape.size() == 0) {
-    MS_LOG(ERROR) << name_ << ": params can not be a scalar!";
-    return FAILED;
-  }
-  if (axis < 0) {
-    axis += SizeToInt(inputs_shape_[0].size());
+  // get axis, the third input is the axis, is a ValueNode, embeddinglookup doesn't have axis.
+  if (target_ != CPU) {
+    if (input_value_.at(2) == nullptr) {
+      MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!";
+      return FAILED;
+    }
+    auto axis = GetValue<int>(input_value_.at(2));
+    // if axis is negative then convert it to positive
+    auto params_shape = inputs_shape_.at(0);
+    if (params_shape.size() == 0) {
+      MS_LOG(ERROR) << name_ << ": params can not be a scalar!";
+      return FAILED;
+    }
+    if (axis < 0) {
+      axis += SizeToInt(inputs_shape_[0].size());
+    }
+    axis_ = axis;
   }
-  axis_ = axis;
 
-  // get target
   auto target_iter = attrs_.find(TARGET);
   if (target_iter != attrs_.end()) {
     MS_EXCEPTION_IF_NULL(target_iter->second);
@@ -52,13 +54,54 @@ Status GatherV2PInfo::GetAttrs() {
       target_ = target_iter->second->cast<StringImmPtr>()->value();
     } else {
       MS_LOG(ERROR) << name_ << " : The value of target is not a string.";
-      return FAILED;
+    }
+  }
+  auto manual_split_iter = attrs_.find("manual_split");
+  if (manual_split_iter != attrs_.end()) {
+    param_split_shapes_.clear();
+    manual_split_ = true;
+    auto var = manual_split_iter->second->cast<ValueTuplePtr>();
+    MS_LOG(DEBUG) << "Extract manual split strategy " << manual_split_iter->second->ToString();
+
+    if (var->size() > 0) {
+      std::vector<ValuePtr> elements = var->value();
+      for (auto &ele : elements) {
+        if (ele->isa<ValueSequeue>()) {
+          auto value_tuple = ele->cast<ValueTuplePtr>();
+          std::vector<ValuePtr> value_vector = value_tuple->value();
+          if (value_vector.size() != 2) {
+            MS_LOG(ERROR) << "Failure: Size of manual_split element must be 2.";
+            return FAILED;
+          }
+          param_split_shapes_.push_back(static_cast<int32_t>(GetValue<int>(value_vector[0])));
+          index_offsets_.push_back(static_cast<int32_t>(GetValue<int>(value_vector[1])));
+        } else {
+          MS_LOG(ERROR) << "Failure: Manual split strategy's format is wrong! Need ValueSequeue";
+          return FAILED;
+        }
+      }
+
+      if (param_split_shapes_.empty()) {
+        MS_LOG(ERROR) << "Failed to extract param split strategy.";
+        return FAILED;
+      }
     }
   }
 
-  // target=CPU, axis must be 0
-  if (target_ == "CPU" && axis_ != 0) {
-    MS_LOG(ERROR) << name_ << ": target is CPU, axis must be 0, but got " << axis_;
+  return SUCCESS;
+}
+
+Status GatherV2PInfo::CheckManualSplit() {
+  auto param_shape = inputs_shape_.at(0);
+  int32_t split_shape_sum = std::accumulate(param_split_shapes_.begin(), param_split_shapes_.end(), 0,
+                                            [](int32_t s, int32_t shape) { return s + shape; });
+  if (split_shape_sum < param_shape.at(0)) {
+    MS_LOG(ERROR) << "Failure: Sum of splited shapes should not be smaller than param_shape.";
+    return FAILED;
+  }
+
+  if (std::any_of(index_offsets_.begin(), index_offsets_.end(), [](const int32_t &offset) { return offset < 0; })) {
+    MS_LOG(ERROR) << "Failure: Index offset must not less than 0.";
     return FAILED;
   }
 
@@ -103,6 +146,14 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) {
     return FAILED;
   }
 
+  if (manual_split_) {
+    if (CheckManualSplit() != SUCCESS) {
+      return FAILED;
+    }
+    // when using manual_split, no need to check belowings.
+    return SUCCESS;
+  }
+
   // axis != 0, param_shape(0)%(param_strategy(0)*param_strategy(axis)) must be 0
   if (axis_ != 0 && param_shape.at(0) % (param_strategy.at(0) * param_strategy.at(IntToSize(axis_))) != 0) {
     MS_LOG(DEBUG) << name_ << ": index_shape(0) can't be divided by (param_strategy(0)*param_strategy(axis)).";
@@ -130,6 +181,11 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) {
 }
 
 Status GatherV2PInfo::InferMirrorOps() {
+  // There is no mirror operators for manual split
+  if (manual_split_) {
+    return SUCCESS;
+  }
+
   mirror_ops_.clear();
   Shape input_a_tensor_map = inputs_tensor_map_.at(0);
   std::vector<Group> input_a_group;
@@ -160,6 +216,13 @@ Status GatherV2PInfo::InferDevMatrixShape() {
   // infer input dev_matrix_shape
   auto param_strategy = strategy_->GetInputDim().at(0);
   auto index_strategy = strategy_->GetInputDim().at(1);
+
+  if (manual_split_) {
+    dev_matrix_shape_ = param_strategy;
+    out_dev_matrix_shape_ = dev_matrix_shape_;
+    return SUCCESS;
+  }
+
   dev_matrix_shape_ = param_strategy;
 
   // param_strategy(axis)!=1,
@@ -195,6 +258,12 @@ Status GatherV2PInfo::InferDevMatrixShape() {
 }
 
 Status GatherV2PInfo::InferTensorMap() {
+  if (manual_split_) {
+    inputs_tensor_map_.push_back({1, 0});
+    inputs_tensor_map_.push_back({-1, 1});
+    outputs_tensor_map_.push_back({-1, 1, 0});
+    return SUCCESS;
+  }
   // infer input tensor map
   // param_strategy(axis) != 1
   size_t param_size = inputs_shape_.at(0).size();
@@ -261,8 +330,13 @@ Status GatherV2PInfo::InferTensorInfo() {
   Shape input_shape = inputs_shape_.at(0);
   Shape input_index_shape = inputs_shape_.at(1);
   Shape output_shape = outputs_shape_.at(0);
+  int32_t rank = g_device_manager->global_rank();
   // infer tensor layout
   TensorLayout input_tensor_layout, input_index_layout, output_tensor_layout;
+  if (manual_split_) {
+    input_shape[0] = param_split_shapes_[rank / dev_matrix_shape_[1]];
+    input_shape[0] = input_shape[0] * dev_matrix_shape_[0];
+  }
   if ((input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(0), input_shape) != SUCCESS) ||
       (input_index_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(1), input_index_shape) != SUCCESS) ||
       (output_tensor_layout.InitFromVector(out_dev_matrix_shape_, outputs_tensor_map_.at(0), output_shape) !=
@@ -274,6 +348,9 @@ Status GatherV2PInfo::InferTensorInfo() {
   TensorInfo input_index_info(input_index_layout);
   TensorInfo output_tensor_info(output_tensor_layout);
 
+  Shape slice_shape = input_tensor_info.slice_shape();
+  MS_LOG(DEBUG) << "The fake slice shape is: " << ShapeToString(slice_shape);
+
   inputs_tensor_info_.push_back(input_tensor_info);
   inputs_tensor_info_.push_back(input_index_info);
   outputs_tensor_info_.push_back(output_tensor_info);
@@ -312,6 +389,19 @@ Status GatherV2PInfo::InferBias() {
   return FAILED;
 }
 
+Status GatherV2PInfo::InferOffset() {
+  CheckGlobalDeviceManager();
+  size_t rank = g_device_manager->global_rank();
+  if (rank < index_offsets_.size()) {
+    index_offset_ = index_offsets_.at(rank);
+    MS_LOG(DEBUG) << name_ << ": Device rank " << rank << ", Index Offset: " << index_offset_;
+    return SUCCESS;
+  }
+
+  MS_LOG(ERROR) << name_ << ": Get index offset failed, index offset size is" << index_offsets_.size();
+  return FAILED;
+}
+
 Status GatherV2PInfo::InferGroup() {
   auto param_strategy = strategy_->GetInputDim().at(0);
   size_t dim = IntToSize(axis_);
@@ -348,12 +438,9 @@ std::vector<int32_t> GetRankFromGroup(const Group &group) {
 
 Status GatherV2PInfo::InferForwardCommunication() {
   forward_op_.clear();
-  if (target_ != CPU) {
-    return SUCCESS;
-  }
   auto param_strategy = strategy_->GetInputDim().at(0);
-  // don't split axis, no need forward communication
-  if (param_strategy.at(IntToSize(axis_)) == 1) {
+  // don't split axis or target is not CPU, no need forward communication
+  if (target_ != CPU || param_strategy.at(IntToSize(axis_)) == 1) {
     return SUCCESS;
   }
   // split axis
@@ -362,38 +449,13 @@ Status GatherV2PInfo::InferForwardCommunication() {
     MS_LOG(ERROR) << name_ << ": Infer Group failed.";
     return FAILED;
   }
-  auto group_size = group_.GetDevNum();
   Attr attr_group;
-  if (host_reduce_scatter_) {
-    // group size <= 8
-    std::vector<int32_t> rank_list;
-    if (group_size <= 8) {
-      reduce_scatter_flag_ = false;
-      operator_name = HOST_REDUCE_SCATTER;
-      rank_list = GetRankFromGroup(group_);
-      attr_group = std::make_pair(GROUP, MakeValue(rank_list));
-    } else {
-      // group size > 8, don't support host reduce_scatter
-      reduce_scatter_flag_ = true;
-      split_num_ = SizeToInt(group_size / 8);
-      CheckGlobalDeviceManager();
-      operator_name = REDUCE_SCATTER;
-      int32_t rank = g_device_manager->global_rank();
-      size_t repeat = group_size / 8;
-      for (size_t i = 0; i < repeat; ++i) {
-        rank_list.push_back(rank + SizeToInt(i * 8));
-      }
-      Group g = g_device_manager->CreateGroup(rank_list);
-      attr_group = std::make_pair(GROUP, MakeValue(g.name()));
-    }
-  } else {
-    operator_name = REDUCE_SCATTER;
-    if (InferGroup() != SUCCESS) {
-      MS_LOG(ERROR) << name_ << ": Infer Group failed.";
-      return FAILED;
-    }
-    attr_group = std::make_pair(GROUP, MakeValue(group_.name()));
+  operator_name = REDUCE_SCATTER;
+  if (InferGroup() != SUCCESS) {
+    MS_LOG(ERROR) << name_ << ": Infer Group failed.";
+    return FAILED;
   }
+  attr_group = std::make_pair(GROUP, MakeValue(group_.name()));
   Attr attr_op = std::make_pair(OP, MakeValue(REDUCE_OP_SUM));
   OperatorAttrs attrs = {attr_op, attr_group};
   OperatorParams params;
@@ -410,6 +472,19 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
     MS_LOG(ERROR) << "GenerateGraph Init failed";
     return FAILED;
   }
+  if (manual_split_) {
+    if (InferOffset() != SUCCESS) {
+      MS_LOG(ERROR) << name_ << ": Infer Bias failed.";
+      return FAILED;
+    }
+    auto sub = gen_g.PushBack({gen_g.NewOpInst(SUB), gen_g.virtual_input_node(), CreateInt32Tensor(index_offset_)});
+    auto gather_v2 =
+      gen_g.PushBack({gen_g.NewOpInst(replace_op_name_), gen_g.virtual_input_node(), sub, CreatInt32Imm(axis_)});
+    std::vector<std::pair<AnfNodePtr, int>> input_nodes = {std::make_pair(sub, 2), std::make_pair(gather_v2, 1)};
+    replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int>>, AnfNodePtr>>(
+      std::make_pair(input_nodes, gather_v2));
+    return SUCCESS;
+  }
   if (InferBias() != SUCCESS) {
     MS_LOG(ERROR) << name_ << ": Infer Bias failed.";
     return FAILED;
@@ -444,6 +519,14 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
 }
 
 ReplaceGraphPtr GatherV2PInfo::replace_graph(const CNodePtr &cnode) {
+  if (manual_split_) {
+    if (ComputeReplaceGraph(cnode) != SUCCESS) {
+      MS_LOG(ERROR) << name_ << ": ComputeReplaceGraph failed.";
+      return nullptr;
+    }
+    return replace_graph_;
+  }
+
   auto param_strategy = strategy_->GetInputDim().at(0);
   // target_ == CPU, no need to raplace graph
   if (target_ == CPU) {
@@ -464,10 +547,7 @@ Status GatherV2PInfo::ComputeReplaceOp() {
   OperatorName op_name = EMBEDDING_LOOKUP;
   OperatorAttrs attrs;
   Attr param_offset = std::make_pair("offset", MakeValue(bias_));
-  Attr param_flag = std::make_pair("reduce_scatter_flag", MakeValue(reduce_scatter_flag_));
-  Attr param_split_num = std::make_pair("split_num", MakeValue(split_num_));
-  OperatorParams params = {std::make_pair(param_offset, 3), std::make_pair(param_flag, 4),
-                           std::make_pair(param_split_num, 5)};
+  OperatorParams params = {std::make_pair(param_offset, 3)};
   OperatorArgs args = std::make_pair(attrs, params);
   Operator op = std::make_pair(op_name, args);
   replace_op_.push_back(op);
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
similarity index 78%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
index 83868606d1..eb26c616d0 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
@@ -36,6 +36,7 @@ class GatherV2PInfo : public OperatorInfo {
       : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<GatherV2PCost>()),
         axis_(0),
         bias_(0),
+        index_offset_(0),
         slice_size_(0) {}
   ~GatherV2PInfo() override = default;
   Status Init(const StrategyPtr &strategy) override;
@@ -57,20 +58,23 @@ class GatherV2PInfo : public OperatorInfo {
 
  private:
   Status ComputeReplaceGraph(const CNodePtr &cnode);
+  Status CheckManualSplit();
   Status ComputeReplaceOp();
   Status InferBias();
+  Status InferOffset();
   Status InferGroup();
 
   int32_t axis_;
-  std::string target_;
+  std::string target_ = DEVICE;
   std::string replace_op_name_ = GATHERV2;
   int32_t bias_;
+  int32_t index_offset_;
   int32_t slice_size_;
   Shape out_dev_matrix_shape_;
   Group group_;
-  bool reduce_scatter_flag_ = false;
-  int32_t split_num_ = 1;
-  bool host_reduce_scatter_ = false;
+  bool manual_split_ = false;
+  std::vector<int32_t> param_split_shapes_;
+  std::vector<int32_t> index_offsets_;
 };
 
 class SparseGatherV2Info : public GatherV2PInfo {
@@ -83,6 +87,14 @@ class SparseGatherV2Info : public GatherV2PInfo {
  private:
   std::string replace_op_name_ = SPARSE_GATHERV2;
 };
+
+class EmbeddingLookupInfo : public GatherV2PInfo {
+ public:
+  EmbeddingLookupInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
+                      const PrimitiveAttrs &attrs)
+      : GatherV2PInfo(name, inputs_shape, outputs_shape, attrs) {}
+  ~EmbeddingLookupInfo() override = default;
+};
 }  // namespace parallel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GATHER_V2_P_INFO_H_
diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/get_next_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
index 0fb49364f0..3606732156 100644
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,10 +22,10 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/context.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/get_next_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
index ba209910b7..36e7a0fcb3 100644
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
index 8716997d9f..126fdcf84e 100644
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
similarity index 90%
rename from mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
index ca063d01d8..c74dde4b4b 100644
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
index 5bdd24090f..62d7c6d61e 100644
--- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/layer_norm_info.h"
+#include "frontend/parallel/ops_info/layer_norm_info.h"
 #include <algorithm>
 #include <vector>
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/layer_norm_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
index 50117b8185..9ee11bb215 100644
--- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/loss_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
index 0ba325c0cd..889f204fb0 100644
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/loss_info.h"
+#include "frontend/parallel/ops_info/loss_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/loss_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
index 2679c2d62b..7e5478bedf 100644
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/matmul_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
index 7d1ab8dc0f..60a3d60b39 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 #include <algorithm>
 #include <functional>
@@ -24,10 +24,10 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/matmul_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
index cb3e54a048..d4e144c2b6 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
@@ -24,9 +24,9 @@
 
 #include "common/utils.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/onehot_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
index ea2d045104..15acb085f5 100644
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/onehot_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
index 3c8a64f954..dfd7e6cbaf 100644
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/operator_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
index f9b294898c..3dd47b1de6 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 #include <algorithm>
 #include <cmath>
@@ -27,9 +27,9 @@
 #include "ir/dtype.h"
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/context.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/context.h"
 #include "utils/context/ms_context.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/operator_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
index 21041c3e94..8641c47491 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
@@ -26,15 +26,15 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "ir/base.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/group_manager.h"
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "base/base.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/group_manager.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h
new file mode 100644
index 0000000000..bc732ed234
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
+#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
+
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/ops_info/batch_parallel_info.h"
+#include "frontend/parallel/ops_info/bias_add_info.h"
+#include "frontend/parallel/ops_info/comparison_function_info.h"
+#include "frontend/parallel/ops_info/dropout_do_mask_info.h"
+#include "frontend/parallel/ops_info/elementary_function_info.h"
+#include "frontend/parallel/ops_info/gather_v2_info.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/ops_info/layer_norm_info.h"
+#include "frontend/parallel/ops_info/loss_info.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/ops_info/gather_v2_p_info.h"
+
+#endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_HEAD_FILES_H_
diff --git a/mindspore/ccsrc/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/ops_utils.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
index 9cb3c7040a..79dfb56693 100644
--- a/mindspore/ccsrc/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -65,6 +65,7 @@ constexpr char STEP_PARALLEL_END[] = "step_parallel_end";
 constexpr char STEP_AUTO_PARALLEL_BEGIN[] = "step_auto_parallel_begin.dot";
 constexpr char REQUIRES_GRAD[] = "requires_grad";
 constexpr char PARAM_NAME[] = "name";
+constexpr char RESHAPEINFO[] = "ReshapeInfo";
 
 constexpr char RELU_TYPE[] = "relu";
 constexpr char RELU6_TYPE[] = "relu6";
@@ -131,6 +132,7 @@ constexpr char REDISTRIBUTION_OP[] = "redistribution_op";
 constexpr char DARA_PARALLEL[] = "data_parallel";
 constexpr char FORWARD_REDUCE_SCATTER[] = "forward_reduce_scatter";
 constexpr char OPTIMIZER_SUB_STRING[] = "optimizer";
+constexpr char DEVICE[] = "Device";
 
 // Operator
 constexpr char VIRTUAL_DIV[] = "_VirtualDiv";
diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/prelu_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
index 14483e97a1..57b35b69f7 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/prelu_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
index 28e149fad7..e6e5e23bac 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
index 7304666a77..0488dceeca 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
index 796c7e457b..ed9ab0721d 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
@@ -24,9 +24,9 @@
 
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/reshape_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
index 57e1a76d0a..fb62c1d02c 100644
--- a/mindspore/ccsrc/parallel/ops_info/reshape_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
 
 #include <memory>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/reshape_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
index 77a1f8e7f1..2463b440f8 100644
--- a/mindspore/ccsrc/parallel/ops_info/reshape_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
@@ -24,8 +24,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
index 772a4f83f6..ed6eaa89f1 100644
--- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 #include <memory>
 #include <vector>
diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
index f7895d0511..7f73f81180 100644
--- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/transpose_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
index 49bbae0cb4..b6bb875abc 100644
--- a/mindspore/ccsrc/parallel/ops_info/transpose_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
 
 #include <memory>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/transpose_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
index 50b76bde65..d3b62dc234 100644
--- a/mindspore/ccsrc/parallel/ops_info/transpose_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
index ce8b04d802..3b89d7c84c 100644
--- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
-#include "parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
index 312ac7a6a4..fe54954be0 100644
--- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/frontend/parallel/ps/common.h b/mindspore/ccsrc/frontend/parallel/ps/common.h
new file mode 100644
index 0000000000..5e136c816f
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/common.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
+
+#include <iostream>
+#include <vector>
+#include <memory>
+#include "ps/ps.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+constexpr char kEnvCommType[] = "MS_COMM_TYPE";
+constexpr char kEnvInterface[] = "MS_INTERFACE";
+constexpr char kEnvPServerNum[] = "MS_SERVER_NUM";
+constexpr char kEnvWorkerNum[] = "MS_WORKER_NUM";
+constexpr char kEnvSchedulerHost[] = "MS_SCHED_HOST";
+constexpr char kEnvSchedulerPort[] = "MS_SCHED_PORT";
+
+constexpr char kEnvRole[] = "MS_ROLE";
+constexpr char kEnvRoleOfPServer[] = "MS_PSERVER";
+constexpr char kEnvRoleOfWorker[] = "MS_WORKER";
+constexpr char kEnvRoleOfScheduler[] = "MS_SCHED";
+
+constexpr char kDmlcCommType[] = "DMLC_PS_VAN_TYPE";
+constexpr char kDmlcInterface[] = "DMLC_INTERFACE";
+constexpr char kDmlcPServerNum[] = "DMLC_NUM_SERVER";
+constexpr char kDmlcWorkerNum[] = "DMLC_NUM_WORKER";
+constexpr char kDmlcRole[] = "DMLC_ROLE";
+constexpr char kDmlcSchedulerHost[] = "DMLC_PS_ROOT_URI";
+constexpr char kDmlcSchedulerPort[] = "DMLC_PS_ROOT_PORT";
+
+constexpr char kCommTypeOfIBVerbs[] = "ibverbs";
+constexpr char kCommTypeOfTCP[] = "zmq";
+constexpr char kRoleOfPServer[] = "server";
+constexpr char kRoleOfWorker[] = "worker";
+constexpr char kRoleOfScheduler[] = "scheduler";
+
+constexpr char kLearningRate[] = "learning_rate";
+constexpr char kMomentum[] = "momentum";
+
+constexpr char kApplyMomentum[] = "ApplyMomentum";
+constexpr char kSparseAdam[] = "Adam";
+constexpr char kSparseFtrl[] = "Ftrl";
+
+constexpr int kInitWeightsCmd = 10;
+constexpr int kInitWeightToOptimIdCmd = 11;
+constexpr int kInitOptimInputsShapeCmd = 12;
+constexpr int kInitEmbeddingsCmd = 20;
+constexpr int kEmbeddingLookupCmd = 30;
+
+constexpr size_t kInvalidKey = UINT64_MAX;
+
+using Key = ::ps::Key;
+using Keys = ::ps::SArray<Key>;
+using Values = ::ps::SArray<float>;
+using ValuesPtr = std::shared_ptr<Values>;
+using Weight = ::ps::SArray<float>;
+using Grad = ::ps::SArray<float>;
+using LookupIds = ::ps::SArray<float>;
+using Lengths = ::ps::SArray<int>;
+using WeightPtr = std::shared_ptr<Weight>;
+using GradPtr = std::shared_ptr<Grad>;
+// using EmbeddingTable = std::unordered_map<int, WeightPtr>;
+// using EmbeddingTable = ::ps::SArray<float>;
+// using EmbeddingTablePtr = std::shared_ptr<EmbeddingTable>;
+using InputsShape = std::vector<std::shared_ptr<std::vector<size_t>>>;
+using InputsShapePtr = std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>>;
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc
new file mode 100644
index 0000000000..e16c713e3c
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/optimizer_info.h"
+#include <memory>
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
+
+const std::vector<AddressPtr> &OptimizerInfo::inputs() { return inputs_; }
+
+const std::vector<AddressPtr> &OptimizerInfo::workspaces() { return workspaces_; }
+
+const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
+
+bool OptimizerInfo::IsSparse() const { return false; }
+
+size_t OptimizerInfo::grad_index() { return 0; }
+
+size_t OptimizerInfo::indices_index() { return 0; }
+
+void OptimizerInfo::UpdateWeight(const WeightPtr &weight) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  inputs_[0] = weight_addr;
+}
+
+void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
+  float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+  size_t size = gradient()->size / sizeof(float);
+  size_t grad_index = this->grad_index();
+  size_t grad_offset = 0;
+  for (size_t i = 0; i < grad_index; i++) {
+    grad_offset += lengths[i];
+  }
+  float *grad_data = values.data() + grad_offset;
+  CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
+
+  for (size_t i = 0; i < size; i++) {
+    accum_grad_data[i] += grad_data[i];
+  }
+}
+
+void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
+  // Append grad data to the end
+  float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+
+  size_t grad_index = this->grad_index();
+  size_t grad_offset = 0;
+  for (size_t i = 0; i < grad_index; i++) {
+    grad_offset += lengths[i];
+  }
+  float *incr_grad_data = values.data() + grad_offset;
+  size_t incr_grad_size = lengths[grad_index] * sizeof(float);
+
+  auto ret = memcpy_s(accum_grad_data + grads_offset_, incr_grad_size, incr_grad_data, incr_grad_size);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+  grads_offset_ += incr_grad_size;
+  gradient()->size += incr_grad_size;
+
+  // Append indice data to the end
+  int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
+
+  size_t indices_index = this->indices_index();
+  size_t indice_offset = 0;
+  for (size_t i = 0; i < indices_index; i++) {
+    indice_offset += lengths[i];
+  }
+  int *incr_indice_data = reinterpret_cast<int *>(values.data() + indice_offset);
+  size_t incr_indice_size = lengths[indices_index] * sizeof(float);
+
+  auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_size, incr_indice_data, incr_indice_size);
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  indices_offset_ += incr_indice_size;
+  indices()->size += incr_indice_size;
+}
+
+void SparseOptimInfo::Reset() {
+  auto &gradient = this->gradient();
+  gradient->size = 0;
+  auto &indices = this->indices();
+  indices->size = 0;
+  grads_offset_ = 0;
+  indices_offset_ = 0;
+}
+
+MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
+                                     const AddressPtr &learning_rate, const AddressPtr &gradient,
+                                     const AddressPtr &momentum) {
+  inputs_.push_back(weight);
+  inputs_.push_back(accumulate);
+  inputs_.push_back(learning_rate);
+  inputs_.push_back(gradient);
+  inputs_.push_back(momentum);
+}
+
+const AddressPtr &MomentumOptimInfo::gradient() { return inputs_[3]; }
+
+const AddressPtr &MomentumOptimInfo::indices() { return inputs_[3]; }
+
+SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
+                                         const AddressPtr &beta1_power, const AddressPtr &beta2_power,
+                                         const AddressPtr &learning_rate, const AddressPtr &beta1,
+                                         const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
+                                         const AddressPtr &indices, size_t grads_offset, size_t indices_offset) {
+  inputs_.push_back(weight);
+  inputs_.push_back(m);
+  inputs_.push_back(v);
+  inputs_.push_back(beta1_power);
+  inputs_.push_back(beta2_power);
+  inputs_.push_back(learning_rate);
+  inputs_.push_back(beta1);
+  inputs_.push_back(beta2);
+  inputs_.push_back(epsilon);
+  inputs_.push_back(grad);
+  inputs_.push_back(indices);
+  grads_offset_ = grads_offset;
+  indices_offset_ = indices_offset;
+}
+
+void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
+  void *data_ptr = values.data();
+  AddressPtr beta1_power = inputs_[3];
+  size_t size = values.size() * sizeof(float);
+  auto ret = memcpy_s(beta1_power->addr, size, data_ptr, size);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+}
+
+const AddressPtr &SparseAdamOptimInfo::gradient() { return inputs_[9]; }
+
+const AddressPtr &SparseAdamOptimInfo::indices() { return inputs_[10]; }
+
+bool SparseAdamOptimInfo::IsSparse() const { return true; }
+
+size_t SparseAdamOptimInfo::grad_index() { return 6; }
+
+size_t SparseAdamOptimInfo::indices_index() { return 7; }
+
+SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
+                                         const AddressPtr &grad, const AddressPtr &indices, size_t grads_offset,
+                                         size_t indices_offset) {
+  inputs_.push_back(weight);
+  inputs_.push_back(accum);
+  inputs_.push_back(linear);
+  inputs_.push_back(grad);
+  inputs_.push_back(indices);
+  grads_offset_ = grads_offset;
+  indices_offset_ = indices_offset;
+}
+
+const AddressPtr &SparseFtrlOptimInfo::gradient() { return inputs_[3]; }
+
+const AddressPtr &SparseFtrlOptimInfo::indices() { return inputs_[4]; }
+
+bool SparseFtrlOptimInfo::IsSparse() const { return true; }
+
+size_t SparseFtrlOptimInfo::grad_index() { return 0; }
+
+size_t SparseFtrlOptimInfo::indices_index() { return 1; }
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h
new file mode 100644
index 0000000000..bb9a64acdb
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h
@@ -0,0 +1,117 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
+
+#include <vector>
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/parallel/ps/common.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::AddressPtr;
+class OptimizerInfo {
+ public:
+  OptimizerInfo() = default;
+  virtual ~OptimizerInfo() = default;
+
+  virtual void Update(const Values &values, const Lengths &lengths) {}
+  virtual void UpdateWeight(const WeightPtr &weight);
+  virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
+  virtual void Reset() {}
+  void AddWorkspace(const AddressPtr &workspace);
+
+  virtual const AddressPtr &gradient() = 0;
+  virtual const AddressPtr &indices() = 0;
+  const std::vector<AddressPtr> &inputs();
+  const std::vector<AddressPtr> &workspaces();
+  const std::vector<AddressPtr> &outputs();
+
+  virtual bool IsSparse() const;
+  virtual size_t grad_index();
+  virtual size_t indices_index();
+
+ protected:
+  std::vector<AddressPtr> inputs_;
+  std::vector<AddressPtr> workspaces_;
+  std::vector<AddressPtr> outputs_;
+};
+
+class DenseOptimInfo : public OptimizerInfo {
+ public:
+  DenseOptimInfo() = default;
+  ~DenseOptimInfo() override = default;
+
+  void Accumulate(const Values &values, const Lengths &lens) override;
+};
+
+class SparseOptimInfo : public OptimizerInfo {
+ public:
+  SparseOptimInfo() = default;
+  ~SparseOptimInfo() override = default;
+
+  void Accumulate(const Values &values, const Lengths &lens) override;
+  void Reset() override;
+
+ protected:
+  size_t grads_offset_{0};
+  size_t indices_offset_{0};
+};
+
+class MomentumOptimInfo : public DenseOptimInfo {
+ public:
+  MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate,
+                    const AddressPtr &gradient, const AddressPtr &momentum);
+  ~MomentumOptimInfo() override = default;
+
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+};
+
+class SparseAdamOptimInfo : public SparseOptimInfo {
+ public:
+  SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
+                      const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
+                      const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
+                      const AddressPtr &indices, size_t grads_offset, size_t indices_offset);
+  ~SparseAdamOptimInfo() override = default;
+
+  void Update(const Values &values, const Lengths &lens) override;
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+  bool IsSparse() const override;
+  size_t grad_index() override;
+  size_t indices_index() override;
+};
+
+class SparseFtrlOptimInfo : public SparseOptimInfo {
+ public:
+  SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
+                      const AddressPtr &grad, const AddressPtr &indices, size_t grads_offset, size_t indices_offset);
+  ~SparseFtrlOptimInfo() override = default;
+
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+  bool IsSparse() const override;
+  size_t grad_index() override;
+  size_t indices_index() override;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
new file mode 100644
index 0000000000..159a50793e
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/optimizer_info_builder.h"
+#include <functional>
+#include <vector>
+#include <memory>
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
+                                           const WeightPtr &weight, const Keys &keys, const Values &values,
+                                           const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) {
+  OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num);
+  std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
+  BuildWorkspaces(optim_info, ws_sizes, worker_num);
+  BuildOutputs(optim_info, worker_num);
+  return optim_info;
+}
+
+void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes,
+                                           size_t worker_num) {
+  for (size_t i = 0; i < ws_sizes.size(); i++) {
+    size_t size = ws_sizes[i];
+    AddressPtr workspace = std::make_shared<kernel::Address>();
+    workspace->addr = new float[size];
+    workspace->size = size;
+    info->AddWorkspace(workspace);
+  }
+}
+
+OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                     const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                     size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  void *data_ptr = values.data();
+  AddressPtr accumulate = std::make_shared<kernel::Address>();
+  accumulate->addr = new float[weight->size()];
+  accumulate->size = weight->size();
+  AddressPtr learning_rate = std::make_shared<kernel::Address>();
+  learning_rate->addr = data_ptr;
+  learning_rate->size = lens[0];
+  AddressPtr gradient = std::make_shared<kernel::Address>();
+  gradient->addr = reinterpret_cast<float *>(learning_rate->addr) + lens[0];
+  gradient->size = lens[1];
+  AddressPtr momentum = std::make_shared<kernel::Address>();
+  momentum->addr = reinterpret_cast<float *>(gradient->addr) + lens[1];
+  momentum->size = lens[2];
+
+  return new MomentumOptimInfo(weight_addr, accumulate, learning_rate, gradient, momentum);
+}
+
+OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                       const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                       size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  AddressPtr m = std::make_shared<kernel::Address>();
+  m->addr = new float[weight->size()];
+  m->size = weight->size() * sizeof(float);
+  AddressPtr v = std::make_shared<kernel::Address>();
+  v->addr = new float[weight->size()];
+  v->size = weight->size() * sizeof(float);
+
+  void *data_ptr = values.data();
+  void *copy_data_ptr = new float[values.size()];
+  auto ret = memcpy_s(copy_data_ptr, values.size() * sizeof(float), data_ptr, values.size() * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+
+  AddressPtr beta1_power = std::make_shared<kernel::Address>();
+  beta1_power->addr = copy_data_ptr;
+  beta1_power->size = lens[0] * sizeof(float);
+  AddressPtr beta2_power = std::make_shared<kernel::Address>();
+  beta2_power->addr = reinterpret_cast<float *>(beta1_power->addr) + lens[0];
+  beta2_power->size = lens[1] * sizeof(float);
+
+  AddressPtr learning_rate = std::make_shared<kernel::Address>();
+  learning_rate->addr = reinterpret_cast<float *>(beta2_power->addr) + lens[1];
+  learning_rate->size = lens[2] * sizeof(float);
+
+  AddressPtr beta1 = std::make_shared<kernel::Address>();
+  beta1->addr = reinterpret_cast<float *>(learning_rate->addr) + lens[2];
+  beta1->size = lens[3] * sizeof(float);
+
+  AddressPtr beta2 = std::make_shared<kernel::Address>();
+  beta2->addr = reinterpret_cast<float *>(beta1->addr) + lens[3];
+  beta2->size = lens[4] * sizeof(float);
+
+  AddressPtr epsilon = std::make_shared<kernel::Address>();
+  epsilon->addr = reinterpret_cast<float *>(beta2->addr) + lens[4];
+  epsilon->size = lens[5] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &grad_shape = (*inputs_shape)[9];
+  size_t total_grad_size =
+    std::accumulate((*grad_shape).begin(), (*grad_shape).end(), sizeof(float), std::multiplies<size_t>());
+  AddressPtr grad = std::make_shared<kernel::Address>();
+  grad->addr = new float[total_grad_size * worker_num];
+  auto ret2 = memcpy_s(grad->addr, lens[6] * sizeof(float), reinterpret_cast<float *>(epsilon->addr) + lens[5],
+                       lens[6] * sizeof(float));
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  grad->size = lens[6] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[10];
+  size_t total_indice_size =
+    std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(float), std::multiplies<size_t>());
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  indices->addr = new float[total_indice_size * worker_num];
+  auto ret3 = memcpy_s(indices->addr, lens[7] * sizeof(float),
+                       reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6], lens[7] * sizeof(float));
+  if (ret3 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret3 << ")";
+  }
+  indices->size = lens[7] * sizeof(float);
+
+  return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
+                                 grad, indices, total_grad_size, total_indice_size);
+}
+
+OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                       const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                       size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  AddressPtr accum = std::make_shared<kernel::Address>();
+  accum->addr = new float[weight->size()];
+  accum->size = weight->size() * sizeof(float);
+  for (size_t i = 0; i < weight->size(); i++) {
+    float *tmp = reinterpret_cast<float *>(accum->addr);
+    tmp[i] = 1.0;
+  }
+  AddressPtr linear = std::make_shared<kernel::Address>();
+  linear->addr = new float[weight->size()];
+  memcpy_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
+  linear->size = weight->size() * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &grad_shape = (*inputs_shape)[3];
+  size_t total_grad_size = std::accumulate((*grad_shape).begin(), (*grad_shape).end(), 1, std::multiplies<size_t>());
+  AddressPtr grad = std::make_shared<kernel::Address>();
+  grad->addr = new float[total_grad_size * worker_num];
+  auto ret = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+  grad->size = lens[0] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[4];
+  size_t total_indice_size =
+    std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  indices->addr = new float[total_indice_size * worker_num];
+  auto ret2 = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
+                       lens[1] * sizeof(float));
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  indices->size = lens[1] * sizeof(float);
+
+  return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices, total_grad_size, total_indice_size);
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h
new file mode 100644
index 0000000000..c5aae32921
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_BUILDER_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/ps/pserver_kernel.h"
+#include "frontend/parallel/ps/optimizer_info.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::KernelMod;
+using mindspore::kernel::ps::PServerKernel;
+class OptimizerInfoBuilder {
+ public:
+  OptimizerInfoBuilder() = default;
+  virtual ~OptimizerInfoBuilder() = default;
+
+  OptimizerInfo *Build(const std::shared_ptr<PServerKernel> &pserver_kernel, const WeightPtr &weight, const Keys &keys,
+                       const Values &values, const Lengths &lens, const InputsShapePtr &inputs_shape,
+                       size_t worker_num);
+
+  virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                     const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0;
+
+  virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
+  virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
+};
+
+class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shape, size_t worker_num) override;
+};
+
+class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shpae, size_t worker_num) override;
+};
+
+class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shpae, size_t worker_num) override;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_BUILDER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h b/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h
new file mode 100755
index 0000000000..1afb4c9fa6
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h
@@ -0,0 +1,559 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
+
+#include <unistd.h>
+#include <unordered_map>
+#include <string>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <cmath>
+#include <random>
+#include "ir/func_graph.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/session_factory.h"
+#include "frontend/parallel/ps/common.h"
+#include "frontend/parallel/ps/optimizer_info.h"
+#include "frontend/parallel/ps/optimizer_info_builder.h"
+#include "frontend/parallel/ps/util.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
+#include "utils/context/ms_context.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/ps/sparse_apply_adam_ps_kernel.h"
+#include "backend/kernel_compiler/ps/sparse_apply_ftrl_ps_kernel.h"
+#include "backend/kernel_compiler/ps/apply_momentum_ps_kernel.h"
+#include "backend/kernel_compiler/ps/embedding_look_up_ps_kernel.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::ps::PServerKernel;
+template <typename T>
+class ParameterServer {
+ public:
+  static ParameterServer &GetInstance() {
+    static ParameterServer instance;
+    return instance;
+  }
+
+  void Run(const FuncGraphPtr &func_graph);
+
+ private:
+  ParameterServer()
+      : pserver_num_(0),
+        worker_num_(0),
+        rank_id_(0),
+        grad_accum_count_(0),
+        ps_(new ::ps::KVServer<T>(0)),
+        handler_(nullptr),
+        func_graph_(nullptr),
+        kernel_graph_(nullptr),
+        sess_(nullptr),
+        thread_(nullptr) {}
+  ~ParameterServer() = default;
+  ParameterServer(const ParameterServer &) = delete;
+  ParameterServer &operator=(const ParameterServer &) = delete;
+
+  struct ServerHandler {
+    explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
+    void operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVServer<T> *server);
+    void HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data);
+    void HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
+    void HandleInitWeights(const ::ps::KVPairs<T> &req_data);
+    void HandleInitWeightToOptimId(const ::ps::KVPairs<T> &req_data);
+    void HandleInitInputsShape(const ::ps::KVPairs<T> &req_data);
+    void HandleInitEmbeddings(const ::ps::KVPairs<T> &req_data);
+    void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
+    ParameterServer *ps_;
+  };
+
+  bool Init(const FuncGraphPtr &func_graph);
+  void InitOptimInfoBuilders();
+  void InitWeightKeyToOptims(const Key &key, const int &optim_id);
+  void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
+  void InitWeight(const Key &key, const WeightPtr &weight);
+  void InitGrad(const Key &key, const GradPtr &grad);
+  void InitEmbeddingTable(const Key &key,
+                          const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
+  void UpdateWeights();
+  void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
+  WeightPtr weight(const Key &key);
+  void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
+  int SumOfShapes(const std::vector<int> &shapes) const;
+  size_t PreComputeCapacity(const Keys &keys, const Lengths &lens);
+  bool ReadyForUpdateWeights();
+  bool ReadyForAccumGrads();
+  void ResetGradAccumCount();
+
+  size_t pserver_num_;
+  size_t worker_num_;
+  size_t rank_id_;
+  size_t grad_accum_count_;
+  std::unique_ptr<::ps::KVServer<T>> ps_;
+  std::unique_ptr<ServerHandler> handler_;
+  FuncGraphPtr func_graph_;
+  std::shared_ptr<session::KernelGraph> kernel_graph_;
+  std::shared_ptr<session::SessionBasic> sess_;
+
+  std::unordered_map<std::string, std::shared_ptr<PServerKernel>> optimizers_;
+  std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
+  std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
+  std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
+  std::unordered_map<Key, std::string> weight_key_to_optims_;
+  std::unordered_map<Key, WeightPtr> weights_;
+  std::unordered_map<Key, WeightPtr> grads_;
+  std::unordered_map<Key, size_t> grads_accum_counter_;
+  // std::unordered_map<Key, EmbeddingTablePtr> embeddings_;
+  std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
+  std::unordered_map<Key, size_t> embedding_row_lens_;
+
+  T learning_rate_;
+  T momentum_;
+
+  std::mutex mutex_;
+  std::condition_variable apply_grads_cv_;
+  std::condition_variable accum_grads_cv_;
+
+  std::unique_ptr<std::thread> thread_;
+
+  friend struct ServerHandler;
+};
+
+class FuncGraph;
+template <typename T>
+void ParameterServer<T>::ServerHandler::operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
+                                                   ::ps::KVServer<T> *server) {
+  ::ps::KVPairs<T> res;
+  if (req_meta.cmd == kInitWeightsCmd) {
+    MS_LOG(ERROR) << "handle init weights cmd" << std::endl;
+    HandleInitWeights(req_data);
+  } else if (req_meta.cmd == kInitWeightToOptimIdCmd) {
+    MS_LOG(ERROR) << "handle init weight optim id mapping cmd" << std::endl;
+    HandleInitWeightToOptimId(req_data);
+  } else if (req_meta.cmd == kInitOptimInputsShapeCmd) {
+    MS_LOG(ERROR) << "handle init inputs shape cmd" << std::endl;
+    HandleInitInputsShape(req_data);
+  } else if (req_meta.cmd == kInitEmbeddingsCmd) {
+    MS_LOG(ERROR) << "handle init embedding cmd" << std::endl;
+    HandleInitEmbeddings(req_data);
+  } else if (req_meta.cmd == kEmbeddingLookupCmd) {
+    MS_LOG(ERROR) << "handle embedding lookup cmd" << std::endl;
+    HandleEmbeddingLookup(req_meta, req_data, &res);
+  } else if (req_meta.push) {
+    MS_LOG(ERROR) << "handle push req cmd" << std::endl;
+    HandlePushReq(req_meta, req_data);
+  } else {
+    MS_LOG(ERROR) << "handle pull req cmd" << std::endl;
+    HandlePullReq(req_meta, req_data, &res);
+  }
+  server->Response(req_meta, res);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data) {
+  ps_->AccumGrad(req_data.keys, req_data.vals, req_data.lens);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
+                                                      ::ps::KVPairs<T> *res) {
+  res->keys = req_data.keys;
+  ::ps::Key key = req_data.keys[0];
+  res->vals = *(ps_->weight(key));
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVPairs<T> &req_data) {
+  size_t key_num = req_data.keys.size();
+  T *data_ptr = req_data.vals.data();
+  size_t pos = 0;
+  for (size_t i = 0; i < key_num; i++) {
+    Key key = req_data.keys[i];
+    size_t data_len = req_data.lens.size() != key_num ? req_data.vals.size() / key_num : req_data.lens[i];
+
+    WeightPtr weight_ptr = std::make_shared<::ps::SArray<T>>();
+    weight_ptr->CopyFrom(data_ptr + pos, data_len);
+    ps_->InitWeight(key, weight_ptr);
+
+    GradPtr grad_ptr = std::make_shared<::ps::SArray<T>>(data_len, 0);
+    ps_->InitGrad(key, grad_ptr);
+    pos += data_len;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVPairs<T> &req_data) {
+  size_t key_num = req_data.keys.size();
+  for (size_t i = 0; i < key_num; i++) {
+    Key key = req_data.keys[i];
+    T val = req_data.vals[i];
+    ps_->InitWeightKeyToOptims(key, val);
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVPairs<T> &req_data) {
+  ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVPairs<T> &req_data) {
+  std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
+    std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
+  std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
+  std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
+  std::shared_ptr<std::vector<size_t>> output_shape = std::make_shared<std::vector<size_t>>();
+  shapes->push_back(input_shape);
+  shapes->push_back(indices_shape);
+  shapes->push_back(output_shape);
+
+  const Key &key = req_data.keys[0];
+  const Lengths &lens = req_data.lens;
+  size_t index = 0;
+  for (int i = 0; i < lens[0]; i++) {
+    input_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  for (int j = 0; j < lens[1]; j++) {
+    indices_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  for (int k = 0; k < lens[2]; k++) {
+    output_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  ps_->InitEmbeddingTable(key, shapes);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
+                                                              const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
+  const Key &key = req_data.keys[0];
+  ps_->DoEmbeddingLookup(key, req_data.vals, res);
+  for (size_t i = 0; i < req_data.vals.size(); i++) {
+    res->keys->push_back(req_data.vals[i]);
+  }
+}
+
+template <typename T>
+bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
+  const char *server_num = getenv(kEnvPServerNum);
+  const char *worker_num = getenv(kEnvWorkerNum);
+  if (server_num != nullptr) {
+    pserver_num_ = *server_num - '0';
+  }
+  if (worker_num != nullptr) {
+    worker_num_ = *worker_num - '0';
+  }
+  func_graph_ = func_graph;
+  rank_id_ = ::ps::MyRank();
+  handler_.reset(new ServerHandler(this));
+
+  InitOptimInfoBuilders();
+
+  ps_->set_request_handle(*handler_);
+  thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
+  return true;
+}
+
+template <typename T>
+void ParameterServer<T>::InitOptimInfoBuilders() {
+  std::shared_ptr<OptimizerInfoBuilder> momentum_info_builder = std::make_shared<MomentumOptimInfoBuilder>();
+  std::shared_ptr<OptimizerInfoBuilder> sparse_adam_info_builder = std::make_shared<SparseAdamOptimInfoBuilder>();
+  std::shared_ptr<OptimizerInfoBuilder> sparse_ftrl_info_builder = std::make_shared<SparseFtrlOptimInfoBuilder>();
+  optim_info_builders_[kApplyMomentum] = momentum_info_builder;
+  optim_info_builders_[kSparseAdam] = sparse_adam_info_builder;
+  optim_info_builders_[kSparseFtrl] = sparse_ftrl_info_builder;
+}
+
+template <typename T>
+void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_id) {
+  if (weight_key_to_optims_.count(key) > 0 || Util::optimizer_name(key) == "") {
+    return;
+  }
+  weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
+}
+
+template <typename T>
+void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
+  InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
+  int val_idx = 0;
+  const Key &key = keys[0];
+
+  if (optim_inputs_shape_.count(key) == 0) {
+    optim_inputs_shape_[key] = inputs_shape;
+  }
+  for (size_t i = 0; i < keys.size(); i++) {
+    auto shape = std::make_shared<std::vector<size_t>>();
+    inputs_shape->push_back(shape);
+
+    int len = lengths[i];
+    for (int j = 0; j < len; j++) {
+      shape->push_back(values[val_idx++]);
+    }
+  }
+  if (weight_key_to_optims_.count(key) > 0) {
+    const std::string &optim_name = weight_key_to_optims_[key];
+    if (optimizers_.count(optim_name) == 0 && optim_inputs_shape_.count(key) > 0) {
+      if (optim_name == kSparseAdam) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::SparseApplyAdamPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      } else if (optim_name == kApplyMomentum) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      } else if (optim_name == kSparseFtrl) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      }
+    }
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
+  if (weights_.count(key) == 0) {
+    weights_[key] = weight;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
+  if (grads_.count(key) == 0) {
+    grads_[key] = grad;
+    grads_accum_counter_[key] = 0;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitEmbeddingTable(
+  const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  // Init embedding lookup kernel
+  std::shared_ptr<PServerKernel> lookup = std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_);
+  lookup->InitKernel(shapes);
+  embedding_lookup_ops_[key] = lookup;
+
+  // Init embedding weight
+  const std::vector<size_t> &input_shapes = lookup->input_sizes();
+  size_t total_dims = 1;
+  for (auto shape : input_shapes) {
+    total_dims *= shape;
+  }
+  WeightPtr embedding = std::make_shared<Weight>(total_dims, 0.01);
+  weights_[key] = embedding;
+
+  grads_accum_counter_[key] = 0;
+}
+
+template <typename T>
+void ParameterServer<T>::UpdateWeights() {
+  while (true) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights(); });
+
+    for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
+      Key key = iter->first;
+      WeightPtr weight_ptr = iter->second;
+
+      std::shared_ptr<PServerKernel> optimizer = nullptr;
+      if (weight_key_to_optims_.count(key) > 0) {
+        const std::string &optim_name = weight_key_to_optims_[key];
+        optimizer = optimizers_[optim_name];
+      }
+      MS_EXCEPTION_IF_NULL(optimizer);
+
+      std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
+      if (optim_info == nullptr) {
+        continue;
+      }
+      const WeightPtr &weight = weights_[key];
+      optim_info->UpdateWeight(weight);
+      const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
+      const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
+      const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
+
+      optimizer->Execute(inputs, workspaces, outputs);
+      optim_info->Reset();
+    }
+    ResetGradAccumCount();
+    accum_grads_cv_.notify_all();
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
+  std::unique_lock<std::mutex> lock(mutex_);
+  accum_grads_cv_.wait(lock, [this] { return this->ReadyForAccumGrads(); });
+
+  const Key &key = keys[0];
+  std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
+
+  // Create or update the optimizer info
+  if (optim_info == nullptr) {
+    const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
+    std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[weight_key_to_optims_[key]];
+    if (pserver_kernel == nullptr) {
+      MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
+    }
+    MS_EXCEPTION_IF_NULL(pserver_kernel);
+    OptimizerInfo *optim =
+      builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
+    optim_info.reset(optim);
+    optim_infos_[key] = optim_info;
+  } else {
+    optim_info->Update(values, lengths);
+  }
+  MS_EXCEPTION_IF_NULL(optim_info);
+
+  optim_info->Accumulate(values, lengths);
+
+  grads_accum_counter_[key] += 1;
+  if (grads_accum_counter_[key] == worker_num_) {
+    grad_accum_count_++;
+  }
+  if (ReadyForUpdateWeights()) {
+    apply_grads_cv_.notify_one();
+  }
+}
+
+template <typename T>
+WeightPtr ParameterServer<T>::weight(const Key &key) {
+  std::unique_lock<std::mutex> lock(mutex_);
+
+  if (weights_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid weight key " << key;
+    return nullptr;
+  }
+  WeightPtr weight_ptr = weights_[key];
+  WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
+  copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
+  return copy_weight_ptr;
+}
+
+template <typename T>
+void ParameterServer<T>::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res) {
+  std::unique_lock<std::mutex> lock(mutex_);
+  if (weights_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid embedding table key " << key;
+    return;
+  }
+  if (embedding_lookup_ops_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid embedding lookup op key " << key;
+    return;
+  }
+  WeightPtr table_ptr = weights_[key];
+  std::shared_ptr<PServerKernel> table_lookup_op = embedding_lookup_ops_[key];
+
+  // Update shapes of lookup operator
+  std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
+    std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
+  std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
+  indices_shape->emplace_back(lookup_ids.size());
+  shapes->push_back(indices_shape);
+  table_lookup_op->ReInit(shapes);
+
+  const std::vector<size_t> output_shapes = table_lookup_op->output_sizes();
+  std::vector<kernel::AddressPtr> inputs;
+  AddressPtr embedding_table = std::make_shared<kernel::Address>();
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  inputs.push_back(embedding_table);
+  inputs.push_back(indices);
+  embedding_table->addr = table_ptr->data();
+  embedding_table->size = table_ptr->size() * sizeof(T);
+  indices->addr = lookup_ids.data();
+  indices->size = lookup_ids.size() * sizeof(T);
+
+  std::vector<kernel::AddressPtr> workspaces;
+  std::vector<kernel::AddressPtr> outputs;
+  AddressPtr output = std::make_shared<kernel::Address>();
+  std::shared_ptr<Values> addr = std::make_shared<Values>(output_shapes[0] / sizeof(T), 0);
+
+  output->addr = addr->data();
+  output->size = output_shapes[0];
+  outputs.push_back(output);
+
+  table_lookup_op->Execute(inputs, workspaces, outputs);
+  res->vals = *addr;
+  res->lens.push_back(res.vals.size());
+}
+
+template <typename T>
+int ParameterServer<T>::SumOfShapes(const std::vector<int> &shapes) const {
+  int sum = 1;
+  for (auto shape : shapes) {
+    sum *= shape;
+  }
+  return sum;
+}
+
+template <typename T>
+size_t ParameterServer<T>::PreComputeCapacity(const Keys &keys, const Lengths &lens) {
+  size_t capacity = 0;
+  for (size_t i = 0; i < keys.size(); i++) {
+    Key key = keys[i];
+    if (embedding_row_lens_.count(key) > 0) {
+      capacity += embedding_row_lens_[key] * lens[i];
+    } else {
+      MS_LOG(ERROR) << "Invalid embedding lookup id " << key;
+    }
+  }
+  return capacity;
+}
+
+template <typename T>
+inline bool ParameterServer<T>::ReadyForUpdateWeights() {
+  return grads_accum_counter_.size() > 0 && grad_accum_count_ == grads_accum_counter_.size();
+}
+
+template <typename T>
+inline bool ParameterServer<T>::ReadyForAccumGrads() {
+  return grad_accum_count_ < weights_.size();
+}
+
+template <typename T>
+inline void ParameterServer<T>::ResetGradAccumCount() {
+  grad_accum_count_ = 0;
+  for (auto iter = grads_accum_counter_.begin(); iter != grads_accum_counter_.end(); iter++) {
+    grads_accum_counter_[iter->first] = 0;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
+  ::ps::Start(0);
+  if (!::ps::IsServer()) {
+    std::cout << "This is not ther Server" << std::endl;
+    return;
+  }
+  Init(func_graph);
+  thread_->join();
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
diff --git a/mindspore/ccsrc/ir/primitive_base_extends.cc b/mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
old mode 100644
new mode 100755
similarity index 68%
rename from mindspore/ccsrc/ir/primitive_base_extends.cc
rename to mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
index 64bdafa4d1..274b7259b0
--- a/mindspore/ccsrc/ir/primitive_base_extends.cc
+++ b/mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
@@ -1,25 +1,32 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/primitive_base.h"
-#include "pipeline/static_analysis/abstract_function.h"
-
-namespace mindspore {
-abstract::AbstractBasePtr Primitive::ToPrimAbstract(const AnfNodePtr &anf_node) {
-  auto prim_func = std::make_shared<abstract::PrimitiveAbstractClosure>(shared_from_base<Primitive>(), anf_node);
-  return prim_func;
-}
-}  // namespace mindspore
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/scheduler.h"
+#include <unistd.h>
+#include "ps/ps.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+void Scheduler::Run() {
+  ::ps::Start(0);
+  while (true) {
+    sleep(1);
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/scheduler.h b/mindspore/ccsrc/frontend/parallel/ps/scheduler.h
new file mode 100755
index 0000000000..e656bcfd22
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/scheduler.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
+namespace mindspore {
+namespace parallel {
+namespace ps {
+class Scheduler {
+ public:
+  static Scheduler &GetInstance() {
+    static Scheduler instance;
+    return instance;
+  }
+
+  void Run();
+
+ private:
+  Scheduler() = default;
+  ~Scheduler() = default;
+  Scheduler(const Scheduler &) = delete;
+  Scheduler &operator=(const Scheduler &) = delete;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/util.cc b/mindspore/ccsrc/frontend/parallel/ps/util.cc
new file mode 100644
index 0000000000..fc63e88901
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/util.cc
@@ -0,0 +1,128 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/util.h"
+#include <unordered_map>
+#include "frontend/parallel/ps/common.h"
+#include "common/utils.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+std::unordered_map<std::string, int> Util::optimizer_to_ids{
+  {kApplyMomentum, 0},
+  {kSparseAdam, 1},
+  {kSparseFtrl, 2},
+};
+
+std::unordered_map<int, std::string> Util::id_to_optimizers{
+  {0, kApplyMomentum},
+  {1, kSparseAdam},
+  {2, kSparseFtrl},
+};
+bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
+
+bool Util::IsRoleOfWorker() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Util::IsRoleOfPServer() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Util::IsRoleOfScheduler() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void Util::SetInternalEnvVar() {
+  if (IsParamServerMode()) {
+    auto comm_type = common::GetEnv(kEnvCommType);
+    if (comm_type.size() > 0) {
+      (void)common::SetEnv(kDmlcCommType, comm_type.c_str());
+    }
+    auto interface = common::GetEnv(kEnvInterface);
+    if (interface.size() > 0) {
+      (void)common::SetEnv(kDmlcInterface, interface.c_str());
+    }
+    auto server_num = common::GetEnv(kEnvPServerNum);
+    if (server_num.size() > 0) {
+      (void)common::SetEnv(kDmlcPServerNum, server_num.c_str());
+    }
+    auto worker_num = common::GetEnv(kEnvWorkerNum);
+    if (worker_num.size() > 0) {
+      (void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str());
+    }
+    if (IsRoleOfScheduler()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfScheduler);
+    } else if (IsRoleOfPServer()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfPServer);
+    } else if (IsRoleOfWorker()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfWorker);
+    }
+    auto scheduler_host = common::GetEnv(kEnvSchedulerHost);
+    if (scheduler_host.size() > 0) {
+      (void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str());
+    }
+    auto scheduler_port = common::GetEnv(kEnvSchedulerPort);
+    if (scheduler_port.size() > 0) {
+      (void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str());
+    }
+  }
+}
+
+int Util::optimizer_id(std::string name) {
+  if (optimizer_to_ids.count(name) > 0) {
+    return optimizer_to_ids[name];
+  }
+  return -1;
+}
+
+std::string Util::optimizer_name(int id) {
+  if (id_to_optimizers.count(id) > 0) {
+    return id_to_optimizers[id];
+  }
+  return "";
+}
+
+bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
+
+int Util::LocalShard(int first_dim, int rank_id, int server_num) {
+  int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
+  int remain_size = first_dim % server_num;
+  if (remain_size == 0 || rank_id < server_num - 1) {
+    return shard_size;
+  } else {
+    return first_dim - (shard_size * (server_num - 1));
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/util.h b/mindspore/ccsrc/frontend/parallel/ps/util.h
new file mode 100644
index 0000000000..8947ad36de
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/util.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include "backend/session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+class Util {
+ public:
+  static bool IsParamServerMode();
+  static bool IsRoleOfWorker();
+  static bool IsRoleOfPServer();
+  static bool IsRoleOfScheduler();
+  static void SetInternalEnvVar();
+  static int optimizer_id(std::string name);
+  static std::string optimizer_name(int id);
+  static bool is_optimizer(std::string name);
+  static int LocalShard(int first_dim, int rank_id, int server_num);
+
+ private:
+  static std::unordered_map<std::string, int> optimizer_to_ids;
+  static std::unordered_map<int, std::string> id_to_optimizers;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/worker.h b/mindspore/ccsrc/frontend/parallel/ps/worker.h
new file mode 100644
index 0000000000..9ecbc28fc5
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/worker.h
@@ -0,0 +1,259 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
+
+#include <utility>
+#include <memory>
+#include <vector>
+#include <string>
+#include <map>
+#include "ps/ps.h"
+#include "utils/log_adapter.h"
+#include "frontend/parallel/ps/util.h"
+#include "frontend/parallel/ps/common.h"
+#include "frontend/parallel/ps/worker_proxy.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+template <typename T>
+class Worker {
+ public:
+  static Worker &GetInstance() {
+    static Worker instance;
+    return instance;
+  }
+
+  void Run();
+  void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes);
+  void Pull(const size_t key, void *dev_addr, const size_t size);
+  size_t SetParamKey(const std::string &param_name);
+  void SetKeyOptimId(size_t key, const std::string &optimizer_name);
+  void SetOptimInputShapes(size_t key, const std::vector<int> &shape);
+  void AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count);
+  void InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vector<size_t> shapes, const std::vector<int> &sizes);
+  void InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size);
+  void DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                           const ::ps::SArray<int> &lens, ::ps::SArray<T> *lookup_result, int cmd);
+
+ private:
+  Worker() : kv_worker_(nullptr), running_(false), key_cnt_(0) {}
+  ~Worker() { ::ps::Finalize(0, true); }
+  Worker(const Worker &) = delete;
+  Worker &operator=(const Worker &) = delete;
+
+  bool IsKeyInit(const size_t key);
+  size_t GetParamKey(const std::string &param_name);
+  void InitPSOptimId(const size_t param_key);
+  void InitPSOptimInputShapes(const size_t key);
+  void InitPSParamData(const std::vector<size_t> &keys, void *origin_addr, size_t size);
+  static void EmbeddingLookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges,
+                                      std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {}
+
+  std::shared_ptr<WorkerProxy<T>> kv_worker_;
+  bool running_;
+  size_t key_cnt_;
+  std::map<std::string, size_t> param_to_key_;
+  std::map<size_t, bool> init_keys_;
+  std::map<size_t, int> key_to_optimId_;
+  std::map<size_t, std::vector<std::vector<int>>> key_to_optim_shapes_;
+};
+
+template <typename T>
+void Worker<T>::Run() {
+  if (running_) {
+    MS_LOG(INFO) << "'Worker is already running.";
+    return;
+  }
+
+  ::ps::Start(0);
+  if (!::ps::IsWorker()) {
+    MS_LOG(EXCEPTION) << "The role is not worker.";
+  }
+  kv_worker_ = std::make_shared<WorkerProxy<T>>(0, 0, 1);
+  running_ = true;
+}
+
+template <typename T>
+void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes) {
+  size_t total_size = 0;
+  for (auto size : sizes) {
+    total_size += size;
+  }
+  ::ps::SArray<T> total_buffer(total_size, 0);
+  size_t offset = 0;
+  for (size_t i = 0; i < sizes.size(); i++) {
+    memcpy(total_buffer.data() + offset / sizeof(T), addrs[i], sizes[i] * sizeof(T));
+    offset += sizes[i] * sizeof(T);
+  }
+  kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
+}
+
+template <typename T>
+void Worker<T>::Pull(const size_t key, void *dev_addr, const size_t size) {
+  ::ps::SArray<T> variables(size / sizeof(T), 0);
+  kv_worker_->Wait(kv_worker_->ZPull({key}, &variables));
+  memcpy(dev_addr, variables.data(), size);
+}
+
+template <typename T>
+void Worker<T>::DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                    const ::ps::SArray<int> &lens, ::ps::SArray<T> *lookup_result, int cmd) {
+  kv_worker_->EmbeddingLookup(keys, lookup_ids, lens, &lookup_result, cmd);
+}
+
+template <typename T>
+void Worker<T>::InitPSParamData(const std::vector<size_t> &keys, void *origin_addr, size_t size) {
+  ::ps::SArray<T> addr(reinterpret_cast<T *>(origin_addr), size / sizeof(T));
+  ::ps::SArray<::ps::Key> key(keys);
+  ::ps::SArray<int> lens;
+  lens.push_back(addr.size());
+  kv_worker_->Wait(kv_worker_->ZPush(key, addr, lens, kInitWeightsCmd));
+  init_keys_[key[0]] = true;
+}
+
+template <typename T>
+void Worker<T>::SetOptimInputShapes(size_t key, const std::vector<int> &shape) {
+  if (key_to_optim_shapes_.find(key) == key_to_optim_shapes_.end()) {
+    key_to_optim_shapes_[key] = {shape};
+  } else {
+    key_to_optim_shapes_[key].push_back(shape);
+  }
+}
+
+template <typename T>
+void Worker<T>::InitPSOptimInputShapes(const size_t key) {
+  ::ps::SArray<::ps::Key> keys;
+  ::ps::SArray<int> shape_len;
+  ::ps::SArray<T> all_shape;
+  std::vector<std::vector<int>> shapes = key_to_optim_shapes_[key];
+  for (auto shape : shapes) {
+    keys.push_back(key);
+    if (shape.size() == 0) {
+      shape_len.push_back(1);
+      all_shape.push_back(1);
+    } else {
+      shape_len.push_back(SizeToInt(shape.size()));
+      for (auto dim : shape) {
+        all_shape.push_back(static_cast<T>(dim));
+      }
+    }
+  }
+  MS_LOG(ERROR) << "keys:" << keys;
+  MS_LOG(ERROR) << "shape_len:" << shape_len;
+  MS_LOG(ERROR) << "all_shape:" << all_shape;
+  if (!init_keys_[key]) {
+    init_keys_[key] = true;
+  }
+  kv_worker_->PushData(keys, all_shape, shape_len, kInitOptimInputsShapeCmd);
+}
+
+template <typename T>
+bool Worker<T>::IsKeyInit(const size_t key) {
+  if (init_keys_.find(key) == init_keys_.end() || !init_keys_[key]) {
+    return false;
+  }
+  return true;
+}
+
+template <typename T>
+size_t Worker<T>::SetParamKey(const std::string &param_name) {
+  size_t key = UINT64_MAX;
+  if (param_to_key_.count(param_name)) {
+    key = param_to_key_[param_name];
+    MS_LOG(INFO) << param_name << " key is already set: key value is " << key;
+  } else {
+    key = key_cnt_++;
+    param_to_key_[param_name] = key;
+    MS_LOG(INFO) << "Set key " << key << " for parameter " << param_name;
+  }
+  return key;
+}
+
+template <typename T>
+size_t Worker<T>::GetParamKey(const std::string &param_name) {
+  size_t key = kInvalidKey;
+  if (param_to_key_.find(param_name) != param_to_key_.end()) {
+    key = param_to_key_[param_name];
+    MS_LOG(ERROR) << "Get key of parameter " << param_name << " key is " << key;
+  }
+  return key;
+}
+
+template <typename T>
+void Worker<T>::SetKeyOptimId(size_t key, const std::string &optimizer_name) {
+  key_to_optimId_[key] = Util::optimizer_id(optimizer_name);
+}
+
+template <typename T>
+void Worker<T>::InitPSOptimId(const size_t param_key) {
+  if (key_to_optimId_.count(param_key) == 0) {
+    MS_LOG(EXCEPTION) << "Can't find optimizer id of parameter key " << param_key;
+  }
+  int optim_id = key_to_optimId_[param_key];
+
+  ::ps::SArray<::ps::Key> keys = {param_key};
+  ::ps::SArray<T> optim_id_vals = {static_cast<T>(optim_id)};
+  ::ps::SArray<int> optim_id_lens = {optim_id_vals.size()};
+  kv_worker_->PushData(keys, optim_id_vals, optim_id_lens, kInitWeightToOptimIdCmd);
+}
+
+template <typename T>
+void Worker<T>::InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vector<size_t> shapes,
+                                     const std::vector<int> &sizes) {
+  bool has_init = IsKeyInit(keys[0]);
+  if (has_init) {
+    MS_LOG(DEBUG) << "The key embedding table of key " << keys[0] << " is initialized.";
+    return;
+  }
+  ::ps::SArray<T> shapes_val;
+  for (auto dim : shapes) {
+    shapes_val.push_back(static_cast<T>(dim));
+  }
+  kv_worker_->Wait(kv_worker_->InitEmbeddingTable(::ps::SArray<::ps::Key>(keys), shapes_val, ::ps::SArray<int>(sizes)));
+}
+
+template <typename T>
+// Initialize parameters and optimizer kernels of Parameter Server.
+void Worker<T>::InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size) {
+  size_t param_key = GetParamKey(param_name);
+  if (param_key == kInvalidKey) {
+    MS_LOG(INFO) << "Parameter " << param_name << " has no key assigned.";
+    return;
+  }
+  bool init = IsKeyInit(param_key);
+  if (!init) {
+    MS_LOG(INFO) << "Init paramter and optimizer in parameter server side for " << param_name;
+    // No need to push embedding table data to Parameter Server.
+    if (param_name.find("embedding_table") == std::string::npos && param_name.find("wide_w") == std::string::npos) {
+      InitPSParamData({param_key}, param_data, param_size);
+    }
+    InitPSOptimId(param_key);
+    InitPSOptimInputShapes(param_key);
+  }
+}
+
+template <typename T>
+void Worker<T>::AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count) {
+  kv_worker_->AddEmbeddingTable(key, row_count);
+}
+
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h b/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h
new file mode 100644
index 0000000000..a0f58d39a4
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h
@@ -0,0 +1,311 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
+
+#include <unordered_map>
+#include <algorithm>
+#include <utility>
+#include <memory>
+#include <vector>
+#include "ps/ps.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+template <typename T>
+class WorkerProxy : public ::ps::KVWorker<T> {
+ public:
+  using Worker = ::ps::KVWorker<T>;
+  using Callback = std::function<void()>;
+  using SlicedKVs = std::vector<std::pair<bool, ::ps::KVPairs<T>>>;
+  using Slicer =
+    std::function<void(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges, SlicedKVs *sliced)>;
+  using ::ps::SimpleApp::obj_;
+  explicit WorkerProxy(int app_id, int customer_id, int lookup_customer_id) : Worker(app_id, customer_id) {
+    using _1 = std::placeholders::_1;
+    using _2 = std::placeholders::_2;
+    using _3 = std::placeholders::_3;
+    lookup_customer_ = std::unique_ptr<::ps::Customer>(
+      new ::ps::Customer(app_id, lookup_customer_id, std::bind(&WorkerProxy<T>::ProcessLookupResult, this, _1)));
+    lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3);
+    init_embedding_slicer_ = std::bind(&WorkerProxy<T>::EmbeddingTableInitSlicer, this, _1, _2, _3);
+    push_slicer_ = std::bind(&WorkerProxy<T>::PushSlicer, this, _1, _2, _3);
+    broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3);
+  }
+  ~WorkerProxy() override = default;
+
+  void AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count);
+  void EmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                       const ::ps::SArray<int> &lens, ::ps::SArray<T> *outs, int cmd = 0, const Callback &cb = nullptr,
+                       int priority = 0);
+  int InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                         const ::ps::SArray<int> &lens = {}, const Callback &cb = nullptr, int priority = 0);
+  void PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens = {},
+                int cmd = 0, int priority = 0);
+
+ private:
+  template <typename C>
+  int AddLookupCB(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids, C *vals, int cmd,
+                  const Callback &cb);
+  void LookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                      std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void EmbeddingTableInitSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void PushSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                  std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void BroadcastSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                       std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void ProcessLookupResult(const ::ps::Message &msg);
+  void Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd, const ::ps::KVPairs<T> &kvs,
+            const Slicer &slicer);
+
+  std::unique_ptr<::ps::Customer> lookup_customer_;
+  std::unordered_map<::ps::Key, std::shared_ptr<std::vector<::ps::Range>>> embedding_table_ranges_;
+  std::unordered_map<int, std::vector<::ps::KVPairs<T>>> lookup_results_;
+  std::mutex mutex_;
+  Slicer lookup_slicer_;
+  Slicer init_embedding_slicer_;
+  Slicer push_slicer_;
+  Slicer broadcast_slicer_;
+  std::unordered_map<int, Callback> lookup_callbacks_;
+};
+
+template <typename T>
+void WorkerProxy<T>::AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count) {
+  uint64_t begin = 0;
+  uint64_t end = 0;
+  int server_num = ::ps::NumServers();
+  for (int i = 0; i < server_num; i++) {
+    int local_row_cnt = Util::LocalShard(row_count, i, server_num);
+    if (i == 0) {
+      end = local_row_cnt - 1;
+    } else {
+      begin = end + 1;
+      end += local_row_cnt;
+    }
+    ::ps::Range range(begin, end);
+    if (embedding_table_ranges_.count(key) == 0) {
+      embedding_table_ranges_[key] = std::make_shared<std::vector<::ps::Range>>();
+    }
+    embedding_table_ranges_[key]->push_back(range);
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::EmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                     const ::ps::SArray<int> &lens, ::ps::SArray<T> *outs, int cmd, const Callback &cb,
+                                     int priority) {
+  int ts = AddLookupCB(keys, lookup_ids, outs, cmd, cb);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = lookup_ids;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(lookup_customer_.get(), ts, true, true, cmd, kvs, broadcast_slicer_);
+  lookup_customer_->WaitRequest(ts);
+}
+
+template <typename T>
+int WorkerProxy<T>::InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                                       const ::ps::SArray<int> &lens, const Callback &cb, int priority) {
+  int ts = obj_->NewRequest(::ps::kServerGroup);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = vals;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(obj_, ts, true, false, kInitEmbeddingsCmd, kvs, init_embedding_slicer_);
+  return ts;
+}
+
+template <typename T>
+void WorkerProxy<T>::PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                              const ::ps::SArray<int> &lens, int cmd, int priority) {
+  int ts = obj_->NewRequest(::ps::kServerGroup);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = vals;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(obj_, ts, true, false, cmd, kvs, push_slicer_);
+  obj_->WaitRequest(ts);
+}
+
+template <typename T>
+template <typename C>
+int WorkerProxy<T>::AddLookupCB(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                C *lookup_result, int cmd, const Callback &cb) {
+  int ts = lookup_customer_->NewRequest(::ps::kServerGroup);
+  const auto &callback = [this, ts, keys, lookup_ids, lookup_result, cb]() mutable {
+    mutex_.lock();
+    auto &kvs = lookup_results_[ts];
+    mutex_.unlock();
+
+    size_t total_len = 0;
+    const auto &s = kvs[0];
+    for (size_t i = 0; i < s.lens.size(); i++) {
+      total_len += s.lens[i];
+    }
+    lookup_result->resize(total_len, 0);
+    T *result_addr = lookup_result->data();
+
+    for (const auto &s : kvs) {
+      size_t offset = 0;
+      for (size_t i = 0; i < s.vals.size(); i++) {
+        result_addr[offset++] += s.vals[i];
+      }
+    }
+
+    mutex_.lock();
+    lookup_results_.erase(ts);
+    mutex_.unlock();
+    if (cb) cb();
+  };
+  lookup_callbacks_[ts] = callback;
+  return ts;
+}
+
+template <typename T>
+void WorkerProxy<T>::LookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                    std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  int *data = send.lens.data();
+  size_t size = send.lens.size();
+  std::vector<int> lookup_ids(data, data + size);
+  std::sort(lookup_ids.begin(), lookup_ids.end());
+
+  const Key &key = send.keys[0];
+  const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
+  sliced->resize(ranges.size());
+
+  size_t index = 0;
+  for (size_t i = 0; i < ranges.size(); i++) {
+    const ::ps::Range &range = ranges[i];
+    const auto &begin = range.begin();
+    const auto &end = range.end();
+    auto &kvs = sliced->at(i).second;
+
+    auto lookup_id = static_cast<uint64_t>(lookup_ids[index]);
+    while (lookup_id >= begin && lookup_id <= end) {
+      kvs.vals.push_back(lookup_id);
+      if (++index >= lookup_ids.size()) {
+        break;
+      }
+      lookup_id = static_cast<uint64_t>(lookup_ids[index]);
+    }
+    kvs.keys.push_back(key);
+    kvs.lens.push_back(kvs.vals.size());
+
+    if (kvs.vals.size() == 0) {
+      sliced->at(i).first = false;
+    } else {
+      sliced->at(i).first = true;
+    }
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::EmbeddingTableInitSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                              std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  const Key &key = send.keys[0];
+  const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
+  sliced->resize(ranges.size());
+  for (size_t i = 0; i < ranges.size(); i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::PushSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  auto server_num = ::ps::Postoffice::Get()->num_servers();
+  sliced->resize(server_num);
+  for (int i = 0; i < server_num; i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::BroadcastSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                     std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  auto server_num = ::ps::Postoffice::Get()->num_servers();
+  sliced->resize(server_num);
+  for (int i = 0; i < server_num; i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::ProcessLookupResult(const ::ps::Message &msg) {
+  int ts = msg.meta.timestamp;
+  if (msg.meta.pull) {
+    CHECK_GE(msg.data.size(), (size_t)2);
+    ::ps::KVPairs<T> kvs;
+    kvs.keys = msg.data[0];
+    kvs.vals = msg.data[1];
+    if (msg.data.size() > (size_t)2) {
+      kvs.lens = msg.data[2];
+    }
+    mutex_.lock();
+    lookup_results_[ts].push_back(kvs);
+    mutex_.unlock();
+  }
+  if (lookup_customer_->NumResponse(ts) == ::ps::Postoffice::Get()->num_servers() - 1) {
+    const auto &cb = lookup_callbacks_[ts];
+    cb();
+    lookup_callbacks_.erase(ts);
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd,
+                          const ::ps::KVPairs<T> &kvs, const Slicer &slicer) {
+  SlicedKVs sliced;
+  slicer(kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced);
+
+  for (size_t i = 0; i < sliced.size(); i++) {
+    const auto &s = sliced[i];
+    if (!s.first) continue;
+    ::ps::Message msg;
+    msg.meta.app_id = customer->app_id();
+    msg.meta.customer_id = customer->customer_id();
+    msg.meta.request = true;
+    msg.meta.push = push;
+    msg.meta.pull = pull;
+    msg.meta.head = cmd;
+    msg.meta.timestamp = timestamp;
+    msg.meta.recver = ::ps::Postoffice::Get()->ServerRankToID(i);
+    msg.meta.priority = kvs.priority;
+    const auto &kvs = s.second;
+    if (kvs.keys.size()) {
+      msg.AddData(kvs.keys);
+      msg.AddData(kvs.vals);
+      if (kvs.lens.size()) {
+        msg.AddData(kvs.lens);
+      }
+    }
+    ::ps::Postoffice::Get()->van()->Send(msg);
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
diff --git a/mindspore/ccsrc/parallel/status.h b/mindspore/ccsrc/frontend/parallel/status.h
similarity index 100%
rename from mindspore/ccsrc/parallel/status.h
rename to mindspore/ccsrc/frontend/parallel/status.h
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/step_auto_parallel.cc
rename to mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
index 894177df8d..8d54eb454a 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/step_auto_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
 
 #include <inttypes.h>
 #include <sys/time.h>
@@ -28,23 +28,23 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/tensor.h"
-#include "optimizer/opt.h"
-#include "optimizer/optimizer.h"
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/auto_parallel/rec_core/rec_generate_strategy.h"
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
-#include "parallel/context.h"
-#include "parallel/ops_info/tmp_identity_info.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/step_parallel.h"
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/pipeline.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/pipeline.h"
 
 namespace mindspore {
 namespace parallel {
@@ -123,9 +123,8 @@ std::vector<bool> ExtractInputParameterByNode(const CNodePtr &node) {
     if (input->isa<Parameter>()) {
       auto input_parameter = input->cast<ParameterPtr>();
       if (input_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(input_parameter->default_param());
-        bool require_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
-        is_parameter.push_back(require_grad);
+        bool requires_grad = input_parameter->default_param()->requires_grad();
+        is_parameter.push_back(requires_grad);
       } else {
         is_parameter.push_back(false);
       }
@@ -799,9 +798,8 @@ void AugmentCostGraph(const std::vector<AnfNodePtr> &all_nodes) {
       auto casted_target_parameter = target_parameter->cast<ParameterPtr>();
       MS_EXCEPTION_IF_NULL(casted_target_parameter);
       if (casted_target_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(casted_target_parameter->default_param());
-        bool require_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
-        is_parameter.push_back(require_grad);
+        bool requires_grad = casted_target_parameter->default_param()->requires_grad();
+        is_parameter.push_back(requires_grad);
       } else {
         is_parameter.push_back(false);
       }
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.h b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
similarity index 95%
rename from mindspore/ccsrc/parallel/step_auto_parallel.h
rename to mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
index c923e5770f..f87d49b736 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
@@ -22,9 +22,9 @@
 #include <string>
 #include <vector>
 #include "ir/anf.h"
-#include "optimizer/opt.h"
-#include "parallel/status.h"
-#include "pipeline/pipeline.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/status.h"
+#include "pipeline/jit/pipeline.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/step_parallel.cc
rename to mindspore/ccsrc/frontend/parallel/step_parallel.cc
index 7d1200b190..6b9cfd9d37 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/step_parallel.h"
 
 #include <inttypes.h>
 #include <sys/time.h>
@@ -28,22 +28,22 @@
 #include <utility>
 
 #include "ir/tensor.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "optimizer/optimizer.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/context.h"
-#include "parallel/device_manager.h"
-#include "parallel/dynamic_creator.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/graph_util/node_info.h"
-#include "parallel/node_check.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/dynamic_creator.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/graph_util/node_info.h"
+#include "frontend/parallel/node_check.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "utils/comm_manager.h"
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 using mindspore::tensor::Tensor;
 
@@ -536,7 +536,7 @@ std::vector<AnfNodePtr> ReplaceOpInput(const Operator &replace_op, const std::st
   }
   std::vector<AnfNodePtr> replace_input = {NewValueNode(pyop_instance), node->input(1)};
   auto prim = GetValueNode<PrimitivePtr>(node->input(0));
-  if (prim->name() == GATHERV2 || prim->name() == SPARSE_GATHERV2) {
+  if (prim->name() == EMBEDDING_LOOKUP) {
     replace_input = {NewValueNode(pyop_instance), node->input(1), node->input(2)};
   }
   if (!params.empty()) {
@@ -611,6 +611,12 @@ void StepReplaceOp(OperatorVector replace_op, const CNodePtr &node) {
     ScopePtr scope = node->scope();
     MS_EXCEPTION_IF_NULL(scope);
     replace_node->set_scope(scope);
+    PrimitivePtr prim = GetValueNode<PrimitivePtr>(replace_node->input(0));
+    if (prim->name() == EMBEDDING_LOOKUP) {
+      auto attrs = prim->attrs();
+      attrs[TARGET] = MakeValue(CPU);
+      (void)prim->SetAttrs(attrs);
+    }
     if (index == replace_op.size() - 1) {
       (void)replace_node->set_operator_info(node->operator_info());
     }
@@ -1298,9 +1304,7 @@ bool ParameterIsCloned(const FuncGraphPtr &root, const AnfNodePtr &parameter_nod
     return false;
   }
 
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(cloned_parameter->default_param());
-  py::object clone_info = parse::python_adapter::GetPyObjAttr(param_value->value(), CLONE_INFO);
-  bool cloned = py::cast<bool>(parse::python_adapter::GetPyObjAttr(clone_info, CLONED));
+  bool cloned = cloned_parameter->default_param()->cloned();
   if (!cloned) {
     return false;
   }
@@ -1321,9 +1325,7 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
     }
 
     // get the cloned index
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(cloned_parameter->default_param());
-    py::object cloned_info = parse::python_adapter::GetPyObjAttr(param_value->value(), CLONE_INFO);
-    int32_t cloned_index = py::cast<int32_t>(parse::python_adapter::GetPyObjAttr(cloned_info, CLONED_INDEX));
+    int32_t cloned_index = cloned_parameter->default_param()->cloned_index();
 
     // find the be cloned parameter
     bool found_be_cloned_parameter = false;
@@ -1337,21 +1339,17 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
         continue;
       }
 
-      auto param_value_cloned = std::dynamic_pointer_cast<ParamValuePy>(be_cloned_parameter->default_param());
-      py::object be_cloned_info = parse::python_adapter::GetPyObjAttr(param_value_cloned->value(), CLONE_INFO);
-      if (!py::cast<bool>(parse::python_adapter::GetPyObjAttr(be_cloned_info, BE_CLONED))) {
+      const auto &param_value_cloned = be_cloned_parameter->default_param();
+      if (!param_value_cloned->be_cloned()) {
         continue;
       }
 
       // get the be cloned index
-      py::list be_cloned_index = parse::python_adapter::GetPyObjAttr(be_cloned_info, BE_CLONED_INDEX);
-      for (auto &index : be_cloned_index) {
-        if (cloned_index == py::cast<int32_t>(index)) {
-          found_be_cloned_parameter = true;
-          cloned_from_parameter = be_cloned_parameter;
-          cloned_from_node = be_cloned_parameter_node;
-          break;
-        }
+      auto &be_cloned_index = param_value_cloned->be_cloned_index();
+      if (std::find(be_cloned_index.begin(), be_cloned_index.end(), cloned_index) != be_cloned_index.end()) {
+        found_be_cloned_parameter = true;
+        cloned_from_parameter = be_cloned_parameter;
+        cloned_from_node = be_cloned_parameter_node;
       }
     }
 
@@ -1375,7 +1373,6 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
   std::string env = common::GetEnv("SLICE_ENV");
   if (!env.empty()) {
     MS_LOG(INFO) << "Slice tensors shape will be configured from env:" << env;
-    abstract::InitUndeterminedFromEnv(env);
   }
 }
 
@@ -2090,9 +2087,9 @@ std::string NodeParameterName(const CNodePtr &node) {
     if (input->isa<Parameter>()) {
       auto input_parameter = input->cast<ParameterPtr>();
       if (input_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(input_parameter->default_param());
-        if (py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), REQUIRES_GRAD))) {
-          return py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), PARAM_NAME));
+        const auto &param_value = input_parameter->default_param();
+        if (param_value->requires_grad()) {
+          return param_value->name();
         }
       }
     }
@@ -2120,6 +2117,9 @@ void CheckpointStrategy(const FuncGraphPtr &func_graph) {
     MS_EXCEPTION_IF_NULL(prim);
     OperatorInfoPtr operator_info = cnode->operator_info();
     if (operator_info) {
+      if (operator_info->name().find(RESHAPEINFO) != std::string::npos) {
+        continue;
+      }
       StrategyPtr strategyPtr = operator_info->strategy();
       MS_EXCEPTION_IF_NULL(node->scope());
       stra_map[param_name] = strategyPtr;
diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/step_parallel.h
rename to mindspore/ccsrc/frontend/parallel/step_parallel.h
index 308473dcd7..f9fe67ea6b 100644
--- a/mindspore/ccsrc/parallel/step_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h
@@ -27,9 +27,9 @@
 #include <set>
 
 #include "./common.h"
-#include "optimizer/opt.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 using OperatorInfoPtr = std::shared_ptr<mindspore::parallel::OperatorInfo>;
 
diff --git a/mindspore/ccsrc/parallel/strategy.h b/mindspore/ccsrc/frontend/parallel/strategy.h
similarity index 98%
rename from mindspore/ccsrc/parallel/strategy.h
rename to mindspore/ccsrc/frontend/parallel/strategy.h
index bc62dd5308..ca01164a6a 100644
--- a/mindspore/ccsrc/parallel/strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/strategy.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
rename to mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
index de10f4beb4..bf7c4e29ab 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
+++ b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 
 #include <fstream>
 #include <memory>
@@ -93,6 +93,7 @@ Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) {
     parallel_strategy_item->set_node_name(node_stra.first);
     straspb::ParallelStrategys *parallel_strategys = parallel_strategy_item->mutable_parallel_strategys();
     MS_EXCEPTION_IF_NULL(parallel_strategys);
+    MS_EXCEPTION_IF_NULL(node_stra.second);
     parallel_strategys->set_stage(IntToUint(node_stra.second->GetInputStage()));
     for (auto &dims : node_stra.second->GetInputDim()) {
       straspb::ParallelStrategy *parallel_strategy = parallel_strategys->add_parallel_strategy();
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
similarity index 93%
rename from mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
rename to mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
index a758a9e7bb..67cbb92ee2 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
+++ b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
@@ -19,9 +19,9 @@
 
 #include <string>
 #include <unordered_map>
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/strategy.h"
-#include "parallel/context.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/context.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/arrangement.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/arrangement.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
index 235ab00302..cff3d53a88 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/arrangement.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
 #include <algorithm>
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/tensor_layout/arrangement.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/arrangement.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
index ca71b05c91..ab807fb20a 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/arrangement.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
@@ -23,8 +23,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/array.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/array.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/array.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
index ef358e7cde..4e1f467793 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/array.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/tensor_layout/array.h"
 #include <utility>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/array.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
similarity index 97%
rename from mindspore/ccsrc/parallel/tensor_layout/array.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
index 5aa3bdb138..13b3982a18 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/array.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
index b5ca5ed60a..9395d3df89 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
 
 #include <functional>
 #include <numeric>
diff --git a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/construct_operator.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
index 1a69638fb6..b06d70af36 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
@@ -22,8 +22,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
similarity index 92%
rename from mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
index 84c0580ba8..d5d34a484f 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
 #include "common/utils.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
similarity index 93%
rename from mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
index c4da4b728f..01c56fc7cf 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_LAYOUT_TRANSFER_H_
 
 #include <string>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/map.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/tensor_layout/map.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
index 669920fc44..184f0c7530 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/map.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/map.h"
+#include "frontend/parallel/tensor_layout/map.h"
 #include <algorithm>
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/tensor_layout/map.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/map.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
index 8c8bba2775..3d299d4b90 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/map.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
@@ -22,9 +22,9 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/arrangement.h"
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/array.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
index 7ed07ac02e..a5a488d807 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
similarity index 88%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
index 7b57f46dd6..0347b6423a 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_REDISTRIBUTION_LAYOUT_TRANSFER_H_
 
 #include <memory>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/layout_transfer.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
index 946620ec4c..6ac24418b7 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
 
 #include <utility>
 
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
index 37a8ac3d9e..66cdb3f925 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/tensor_layout/construct_operator.h"
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
index 4c66befd78..98f7cf78fa 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
index ed62cb59da..f9ebe9e32b 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_RESHAPE_LAYOUT_TRANSFER_H_
 
 #include <memory>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/shape_util.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/shape_util.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
index e8f208708c..83282d16b3 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/shape_util.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include <utility>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/shape_util.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
similarity index 99%
rename from mindspore/ccsrc/parallel/tensor_layout/shape_util.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
index 2ec21f3881..49dd39ffd6 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/shape_util.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
@@ -23,7 +23,7 @@
 #include <string>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_info.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_info.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
index 0eee736cea..fc78b1f59c 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_info.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
index f3498065f2..b9c6cc78de 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/array.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/array.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
similarity index 94%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
index f51ed4e3e0..a9fdc9610c 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
@@ -22,10 +22,10 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/device_manager.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/arrangement.h"
-#include "parallel/tensor_layout/map.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/map.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
index 7824c21f3d..43bb330787 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 #include <cfloat>
 #include <functional>
 #include <numeric>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
index d1f46108bb..df4bd1570f 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
@@ -25,11 +25,11 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/construct_operator.h"
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/gvar/typeid_manager.cc b/mindspore/ccsrc/gvar/typeid_manager.cc
index f40052411a..bc74f3a0df 100644
--- a/mindspore/ccsrc/gvar/typeid_manager.cc
+++ b/mindspore/ccsrc/gvar/typeid_manager.cc
@@ -20,7 +20,7 @@
 #include <mutex>
 #include <unordered_map>
 
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
similarity index 86%
rename from mindspore/ccsrc/dataset/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/CMakeLists.txt
index 9238be93f2..df9729c4ee 100644
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
@@ -34,11 +34,12 @@ endif ()
 
 ########### Set up the include directories ###########################
 include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc)
-include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/ascend/platform)
 
 include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h
 
-include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/mindrecord/include)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include)
 ######################################################################
 
 ####################### Flags ########################################
@@ -46,6 +47,8 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ORIGIN:$ORIGIN/lib")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
 
+ms_build_flatbuffers("engine/cache/de_tensor.fbs" ${CMAKE_CURRENT_SOURCE_DIR} generated_engine_files ${CMAKE_BINARY_DIR})
+
 ################## Include sub-modules ###############################
 add_subdirectory(util)
 add_subdirectory(core)
@@ -54,7 +57,7 @@ add_subdirectory(engine)
 add_subdirectory(api)
 add_subdirectory(text)
 ######################################################################
-add_dependencies(core utils)
+add_dependencies(utils core)
 add_dependencies(kernels-image core)
 add_dependencies(kernels-data core)
 add_dependencies(kernels core)
@@ -67,7 +70,10 @@ add_dependencies(engine-gnn core)
 add_dependencies(engine core)
 add_dependencies(text core)
 add_dependencies(text-kernels core)
-add_dependencies(APItoPython core)
+add_dependencies(cpp-API core)
+if (ENABLE_PYTHON)
+    add_dependencies(APItoPython core)
+endif()
 if (ENABLE_TDTQUE)
     add_dependencies(engine-tdt core)
 endif ()
@@ -78,24 +84,34 @@ set(submodules
     $<TARGET_OBJECTS:kernels>
     $<TARGET_OBJECTS:kernels-image>
     $<TARGET_OBJECTS:kernels-data>
-    $<TARGET_OBJECTS:APItoPython>
+    $<TARGET_OBJECTS:cpp-API>
     $<TARGET_OBJECTS:engine-datasetops-source>
     $<TARGET_OBJECTS:engine-datasetops-source-sampler>
     $<TARGET_OBJECTS:engine-gnn>
     $<TARGET_OBJECTS:engine-perf>
     $<TARGET_OBJECTS:engine-datasetops>
     $<TARGET_OBJECTS:engine-opt>
+    $<TARGET_OBJECTS:engine-cache-client>
+    $<TARGET_OBJECTS:engine-cache-server>
     $<TARGET_OBJECTS:engine>
     $<TARGET_OBJECTS:text>
     $<TARGET_OBJECTS:text-kernels>
     )
 
+if (ENABLE_PYTHON)
+    set(submodules
+        ${submodules}
+        $<TARGET_OBJECTS:APItoPython>)
+endif()
+
 if (ENABLE_TDTQUE)
     add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>)
 else ()
     add_library(_c_dataengine SHARED ${submodules})
 endif ()
 
+add_dependencies(_c_dataengine generated_engine_files)
+
 set_target_properties(_c_dataengine PROPERTIES
     PREFIX "${PYTHON_MODULE_PREFIX}"
     SUFFIX "${PYTHON_MODULE_EXTENSION}"
@@ -126,7 +142,7 @@ endif ()
 
 add_dependencies(_c_dataengine _c_mindrecord)
 if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    set(MINDRECORD_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/mindrecord/CMakeFiles/_c_mindrecord.dir/objects.a)
+    set(MINDRECORD_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/minddata/mindrecord/CMakeFiles/_c_mindrecord.dir/objects.a)
     target_link_libraries(_c_dataengine PRIVATE _c_mindrecord ${MINDRECORD_LINK_OBJECT} mindspore::sqlite)
 else()
     target_link_libraries(_c_dataengine PRIVATE _c_mindrecord)
diff --git a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
new file mode 100644
index 0000000000..ae0b9cc28e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
@@ -0,0 +1,16 @@
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+if (ENABLE_PYTHON)
+  add_library(APItoPython OBJECT
+    de_pipeline.cc
+    python_bindings.cc
+    )
+  target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
+
+add_library(cpp-API OBJECT
+  datasets.cc
+  iterator.cc
+  transforms.cc
+  samplers.cc
+  )
diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
new file mode 100644
index 0000000000..3072a62dc9
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -0,0 +1,446 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/include/samplers.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/random.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+#define RETURN_NULL_IF_ERROR(_s) \
+  do {                           \
+    Status __rc = (_s);          \
+    if (__rc.IsError()) {        \
+      return nullptr;            \
+    }                            \
+  } while (false)
+
+// Function to create the iterator, which will build and launch the execution tree.
+std::shared_ptr<Iterator> Dataset::CreateIterator() {
+  std::shared_ptr<Iterator> iter;
+  try {
+    iter = std::make_shared<Iterator>();
+    Status rc = iter->BuildAndLaunchTree(shared_from_this());
+    if (rc.IsError()) {
+      MS_LOG(ERROR) << "CreateIterator failed.";
+      return nullptr;
+    }
+
+    return iter;
+  } catch (const std::exception &err) {
+    MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what();
+    return nullptr;
+  }
+
+  return iter;
+}
+
+// Constructor
+Dataset::Dataset() {
+  // Fetch some default value from config manager
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  connector_que_size_ = cfg->op_connector_size();
+}
+
+// Function to create a ImageFolderDataset.
+std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode,
+                                                std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions,
+                                                std::map<std::string, int32_t> class_indexing) {
+  // This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false.
+  bool recursive = false;
+
+  // Create logical representation of ImageFolderDataset.
+  auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a MnistDataset.
+std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) {
+  auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a Cifar10Dataset.
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
+                                        std::shared_ptr<SamplerObj> sampler) {
+  auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a Batch dataset
+std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) {
+  // Default values
+  std::vector<std::string> cols_to_map = {};
+  std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map;
+  bool pad = false;
+  auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create Repeat dataset.
+std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) {
+  // Workaround for repeat == 1, do not inject repeat.
+  if (count == 1) {
+    return shared_from_this();
+  }
+
+  auto ds = std::make_shared<RepeatDataset>(count);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a Map dataset.
+std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                         std::vector<std::string> input_columns,
+                                         std::vector<std::string> output_columns,
+                                         const std::vector<std::string> &project_columns) {
+  auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a ShuffleOp
+std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) {
+  // Pass in reshuffle_each_epoch with true
+  auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a ProjectDataset.
+std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) {
+  auto ds = std::make_shared<ProjectDataset>(columns);
+  // Call derived class validation method.
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Helper function to create default RandomSampler.
+std::shared_ptr<SamplerObj> CreateDefaultSampler() {
+  int32_t num_samples = 0;  // 0 means to sample all ids.
+  bool replacement = false;
+  return std::make_shared<RandomSamplerObj>(replacement, num_samples);
+}
+
+/* ####################################### Derived Dataset classes ################################# */
+
+ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
+                                       bool recursive, std::set<std::string> extensions,
+                                       std::map<std::string, int32_t> class_indexing)
+    : dataset_dir_(dataset_dir),
+      decode_(decode),
+      sampler_(sampler),
+      recursive_(recursive),
+      class_indexing_(class_indexing),
+      exts_(extensions) {}
+
+bool ImageFolderDataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  // This arg is exist in ImageFolderOp, but not externalized (in Python API).
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
+  node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                                     recursive_, decode_, exts_, class_indexing_, std::move(schema),
+                                                     std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
+    : dataset_dir_(dataset_dir), sampler_(sampler) {}
+
+bool MnistDataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
+
+  node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                               std::move(schema), std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
+                           std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
+    : batch_size_(batch_size),
+      drop_remainder_(drop_remainder),
+      pad_(pad),
+      cols_to_map_(cols_to_map),
+      pad_map_(pad_map) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+#ifdef ENABLE_PYTHON
+  py::function noop;
+  node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
+                                               cols_to_map_, noop, noop, pad_map_));
+#else
+  node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
+                                               cols_to_map_, pad_map_));
+#endif
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool BatchDataset::ValidateParams() {
+  if (batch_size_ <= 0) {
+    return false;
+  }
+
+  return true;
+}
+
+RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool RepeatDataset::ValidateParams() {
+  if (repeat_count_ <= 0) {
+    return false;
+  }
+
+  return true;
+}
+MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns,
+                       std::vector<std::string> output_columns, const std::vector<std::string> &project_columns)
+    : operations_(operations),
+      input_columns_(input_columns),
+      output_columns_(output_columns),
+      project_columns_(project_columns) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // Currently default is true, and this is not exposed to user.
+  bool perf_mode = true;
+
+  std::vector<std::shared_ptr<TensorOp>> tensor_ops;
+
+  // Build tensorOp from tensorOperation vector
+  // This is to ensure each iterator hold its own copy of the tensorOp objects.
+  (void)std::transform(
+    operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
+    [](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); });
+
+  // This parameter will be removed with next rebase
+  std::vector<std::string> col_orders;
+  auto map_op =
+    std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode);
+  if (!project_columns_.empty()) {
+    auto project_op = std::make_shared<ProjectOp>(project_columns_);
+    node_ops.push_back(project_op);
+  }
+
+  node_ops.push_back(map_op);
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool MapDataset::ValidateParams() {
+  if (operations_.empty()) {
+    return false;
+  }
+
+  return true;
+}
+
+// Constructor for ShuffleDataset
+ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch)
+    : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {}
+
+// Function to build the ShuffleOp
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
+                                                 rows_per_buffer_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+// Function to validate the parameters for ShuffleDataset
+bool ShuffleDataset::ValidateParams() {
+  if (shuffle_size_ <= 1) {
+    MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_;
+    return false;
+  }
+
+  return true;
+}
+
+// Constructor for Cifar10Dataset
+Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler)
+    : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {}
+
+bool Cifar10Dataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "Number of samples cannot be negative";
+    return false;
+  }
+  return true;
+}
+
+// Function to build CifarOp
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
+
+  node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_,
+                                               dataset_dir_, connector_que_size_, std::move(schema),
+                                               std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+// Function to build ProjectOp
+ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {}
+
+bool ProjectDataset::ValidateParams() {
+  if (columns_.empty()) {
+    MS_LOG(ERROR) << "No columns are specified.";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<ProjectOp>(columns_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/api/de_pipeline.cc
rename to mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
index 78fcdb7dd4..2a6166f868 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
@@ -13,34 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/api/de_pipeline.h"
+#include "minddata/dataset/api/de_pipeline.h"
 
 #include <algorithm>
 #include <set>
 #include <map>
 
 #include "common/utils.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/bucket_batch_by_length_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/source/celeba_op.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/kernels/py_func_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/kernels/py_func_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "pybind11/stl.h"
 #include "utils/log_adapter.h"
 
@@ -441,6 +444,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
   MapOp::Builder map_builder;
   std::vector<std::shared_ptr<TensorOp>> tensor_op_list;
   std::vector<std::string> project_columns;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  int num_workers = 0;
 
   if (args["operations"].is_none()) RETURN_STATUS_UNEXPECTED("Error: 'operations' is not set. \n");
 
@@ -456,7 +461,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
       } else if (key == "columns_order") {
         project_columns = ToStringVector(value);
       } else if (key == "num_parallel_workers") {
-        (void)map_builder.SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)map_builder.SetNumWorkers(num_workers);
       } else if (key == "prefetch_size") {
         (void)map_builder.SetOpConnectorSize(ToInt(value));
       } else if (key == "operations") {
@@ -477,6 +483,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
         }
         if (tensor_op_list.empty()) RETURN_STATUS_UNEXPECTED("Error: tensor_op is invalid or not set.");
         (void)map_builder.SetTensorFuncs(std::move(tensor_op_list));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
       } else {
         RETURN_STATUS_UNEXPECTED("Error: Unhandled key: " + key);
       }
@@ -499,6 +507,15 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
     *bottom = map_op;
   }
 
+  // Additionally, add a cache if required.  This will go over top of the project op if one
+  // was created, otherwise it goes over top of the map op
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, *top, &cache_op));
+    *top = cache_op;
+    *bottom = map_op;
+  }
+
   return Status::OK();
 }
 
@@ -809,6 +826,9 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
                                    std::shared_ptr<DatasetOp> *bottom) {
   // Required arguments
   std::vector<std::string> files_list;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  std::shared_ptr<Sampler> sampler = nullptr;
+  int num_workers = 0;
   std::shared_ptr<TFReaderOp::Builder> builder = std::make_shared<TFReaderOp::Builder>();
   if (!args["dataset_files"].is_none()) {
     files_list = ToStringVector(args["dataset_files"]);
@@ -828,7 +848,8 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     py::handle value = arg.second;
     if (!value.is_none()) {
       if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)builder->SetNumWorkers(num_workers);
       } else if (key == "columns_list") {
         columns_to_load = ToStringVector(value);
         (void)builder->SetColumnsToLoad(columns_to_load);
@@ -848,6 +869,11 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
         (void)builder->SetDeviceId(ToInt(value));
       } else if (key == "shard_equal_rows") {
         (void)builder->SetShardEqualRows(ToBool(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
+      } else if (key == "sampler") {
+        auto create = py::reinterpret_borrow<py::object>(value).attr("create");
+        sampler = create().cast<std::shared_ptr<Sampler>>();
       }
     }
   }
@@ -860,12 +886,27 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     }
     (void)builder->SetDataSchema(std::move(schema));
   }
+
+  // If the user gave a sampler, but they did not ask for a cache, then by itself this is not allowed
+  // because TFReaderOp is a non-mappable dataset that does not support sampling.
+  // However, if a cache operator is injected at some other place higher in the tree, that cache can
+  // inherit this sampler from the leaf, providing sampling support from the caching layer.
+  // That is why we save the sampler here in a leaf node that does not use sampling.
+  if (sampler) {
+    (void)builder->SetSampler(std::move(sampler));
+  } else if (cache_client) {
+    int64_t num_samples = 0;
+    int64_t start_index = 0;
+    sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+    (void)builder->SetSampler(std::move(sampler));
+  }
+
   std::shared_ptr<TFReaderOp> tf_op;
   RETURN_IF_NOT_OK(builder->Build(&tf_op));
   RETURN_IF_NOT_OK(tree_->AssociateNode(tf_op));
   *top = tf_op;
 
-  if (shuffle_required) {
+  if (!cache_client && shuffle_required) {
     const boolean estimate = true;
     const int64_t workers = 8;
     std::shared_ptr<DatasetOp> shuffle_op = nullptr;
@@ -882,6 +923,15 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     *bottom = tf_op;
   }
 
+  // Add a cache op over this op if required and update the output subtree (top/bottom)
+  if (cache_client) {
+    // Note, it is not allowed to have both shuffle and cache
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, tf_op, &cache_op));
+    *top = cache_op;
+    *bottom = tf_op;
+  }
+
   return Status::OK();
 }
 
@@ -906,6 +956,8 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
     std::string err_msg = "Error: No dataset path specified";
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  int num_workers = 0;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
   std::shared_ptr<ImageFolderOp::Builder> builder = std::make_shared<ImageFolderOp::Builder>();
   (void)builder->SetImageFolderDir(ToString(args["dataset_dir"]));
 
@@ -915,7 +967,8 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
     py::handle value = arg.second;
     if (!value.is_none()) {
       if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)builder->SetNumWorkers(num_workers);
       } else if (key == "sampler") {
         auto create = py::reinterpret_borrow<py::object>(value).attr("create");
         std::shared_ptr<Sampler> sampler = create().cast<std::shared_ptr<Sampler>>();
@@ -926,12 +979,27 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
         (void)builder->SetClassIndex(ToStringMap(value));
       } else if (key == "decode") {
         (void)builder->SetDecode(ToBool(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
       }
     }
   }
-  std::shared_ptr<ImageFolderOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *top = op;
+  std::shared_ptr<ImageFolderOp> if_op;
+  RETURN_IF_NOT_OK(builder->Build(&if_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(if_op));
+  *top = if_op;
+
+  // Additionally, add a cache if required.
+  // Note that this cache op is only acting as a place holder for the caching position
+  // within the tree.  Later, a pre-pass will execute a tree transform to set up the actual
+  // caching logic in the tree.
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, if_op, &cache_op));
+    *top = cache_op;
+    *bottom = if_op;
+  }
+
   return Status::OK();
 }
 
@@ -1130,9 +1198,12 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
                                      std::shared_ptr<DatasetOp> *bottom) {
   // Required arguments
   RandomDataOp::Builder builder;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  std::shared_ptr<Sampler> sampler = nullptr;
+  int num_workers = 0;
 
-  if (args["num_samples"].is_none()) {
-    std::string err_msg = "Error: num_samples is a required argument";
+  if (args["total_rows"].is_none()) {
+    std::string err_msg = "Error: total_rows is a required argument";
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
   std::vector<std::string> columns_to_load;
@@ -1141,16 +1212,23 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
   for (auto arg : args) {
     std::string key = py::str(arg.first);
     py::handle value = arg.second;
-    if (key == "num_parallel_workers") {
-      (void)builder.SetNumWorkers(ToInt(value));
-    } else if (key == "schema_file_path" || key == "schema_json_string") {
-      schema_exists = true;
-    } else if (key == "columns_list") {
-      columns_to_load = ToStringVector(value);
-    } else if (key == "num_samples") {
-      // This is not sampling here. The random data op needs to know how much data to
-      // generate. It does not currently support sampling.
-      (void)builder.SetTotalRows(ToInt(value));
+    if (!value.is_none()) {
+      if (key == "num_parallel_workers") {
+        num_workers = ToInt(value);
+        (void)builder.SetNumWorkers(num_workers);
+      } else if (key == "schema_file_path" || key == "schema_json_string") {
+        schema_exists = true;
+      } else if (key == "columns_list") {
+        columns_to_load = ToStringVector(value);
+      } else if (key == "total_rows") {
+        // This is not sampling here. The random data op needs to know how much data to generate.
+        (void)builder.SetTotalRows(ToInt(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
+      } else if (key == "sampler") {
+        auto create = py::reinterpret_borrow<py::object>(value).attr("create");
+        sampler = create().cast<std::shared_ptr<Sampler>>();
+      }
     }
   }
   if (schema_exists) {
@@ -1162,9 +1240,34 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
     }
     (void)builder.SetDataSchema(std::move(schema));
   }
-  std::shared_ptr<RandomDataOp> op;
-  RETURN_IF_NOT_OK(builder.Build(&op));
-  *top = op;
+
+  // If the user gave a sampler, but they did not ask for a cache, then by itself this is not allowed
+  // because RandomDataOp is a non-mappable dataset that does not support sampling.
+  // However, if a cache operator is injected at some other place higher in the tree, that cache can
+  // inherit this sampler from the leaf, providing sampling support from the caching layer.
+  // That is why we save the sampler here in a leaf node that does not use sampling.
+  if (sampler) {
+    (void)builder.SetSampler(std::move(sampler));
+  } else if (cache_client) {
+    int64_t num_samples = 0;
+    int64_t start_index = 0;
+    sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+    (void)builder.SetSampler(std::move(sampler));
+  }
+
+  std::shared_ptr<RandomDataOp> random_op = nullptr;
+  RETURN_IF_NOT_OK(builder.Build(&random_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(random_op));
+  *top = random_op;
+
+  // Add a cache op over this op if required and update the output subtree (top/bottom)
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, random_op, &cache_op));
+    *top = cache_op;
+    *bottom = random_op;
+  }
+
   return Status::OK();
 }
 
@@ -1425,6 +1528,31 @@ Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp>
   return Status::OK();
 }
 
+// Helper function to inject the cache operator over top of the current operation being built.
+Status DEPipeline::AddCacheOp(std::shared_ptr<CacheClient> cache_client, int num_workers,
+                              std::shared_ptr<DatasetOp> input_op, std::shared_ptr<DatasetOp> *cache_op) {
+  std::shared_ptr<CacheOp> new_cache_op = nullptr;
+  CacheOp::Builder cache_builder;
+  // use the same number of workers as the leaf. We need some optimization here, the user does not
+  // give the cache op number of workers directly.
+  if (num_workers != 0) {
+    (void)cache_builder.SetNumWorkers(num_workers);
+  }
+  (void)cache_builder.SetClient(cache_client);
+  RETURN_IF_NOT_OK(cache_builder.Build(&new_cache_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(new_cache_op));
+  RETURN_IF_NOT_OK(new_cache_op->AddChild(input_op));
+  // We have now created:
+  //
+  // CacheOp
+  //   |
+  // input_op
+  //
+  *cache_op = new_cache_op;
+
+  return Status::OK();
+}
+
 // Helper function to inject a shuffle operator over top of the current operation being built.
 Status DEPipeline::AddShuffleOp(int64_t shuffle_size, std::shared_ptr<DatasetOp> input_op,
                                 std::shared_ptr<DatasetOp> *shuffle_op) {
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
similarity index 90%
rename from mindspore/ccsrc/dataset/api/de_pipeline.h
rename to mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
index 7cfc73307c..755e827ef2 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/core/client.h"  // DE client
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"  // DE client
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/util/status.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@@ -35,6 +35,8 @@ namespace mindspore {
 namespace dataset {
 using DsOpPtr = std::shared_ptr<DatasetOp>;
 
+class CacheClient;
+
 // enum for the dataset operator names
 enum OpName {
   kShuffle,
@@ -181,6 +183,16 @@ class DEPipeline {
 
   static Status ParsePadInfo(py::handle value, PadInfo *pad_info);
 
+  /// \brief Helper function to inject a cache operator over top of the current operation being built.
+  /// \param[in] cache_client The client to use for caching
+  /// \param[in] num_workers The number of workers to use in the cache op
+  /// \param[in] input_op The operator to build the cache on top of
+  /// \param[out] cache_op The top node of the created subtree (subtree contains two nodes). In this case it will be
+  ///     the cache operator
+  /// \return Status return code
+  Status AddCacheOp(std::shared_ptr<CacheClient> cache_client, int num_workers, std::shared_ptr<DatasetOp> input_op,
+                    std::shared_ptr<DatasetOp> *cache_op);
+
   /// \brief Helper function to inject a shuffle operator over top of the current operation being built.
   /// \param[in] shuffle_size The size to use in the shuffle buffer
   /// \param[in] input_op The operator to build shuffle on top of
diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
new file mode 100644
index 0000000000..068bcfaa04
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/include/datasets.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+// Get the next row from the data pipeline.
+void Iterator::GetNextRow(TensorMap *row) {
+  Status rc = iterator_->GetNextAsMap(row);
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "GetNextRow: Failed to get next row.";
+    row->clear();
+  }
+}
+
+// Shut down the data pipeline.
+void Iterator::Stop() {
+  // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
+  iterator_.reset();
+
+  // Release ownership of tree_ shared pointer. This will decrement the ref count.
+  tree_.reset();
+}
+
+// Function to build and launch the execution tree.
+Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
+  // One time init
+  Status rc;
+  rc = GlobalInit();
+  RETURN_IF_NOT_OK(rc);
+
+  // Instantiate the execution tree
+  tree_ = std::make_shared<ExecutionTree>();
+
+  // Iterative BFS converting Dataset tree into runtime Execution tree.
+  std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q;
+
+  if (ds != nullptr) {
+    // Convert the current root node.
+    auto root_op = ds->Build()->front();
+    RETURN_UNEXPECTED_IF_NULL(root_op);
+
+    RETURN_IF_NOT_OK(tree_->AssociateNode(root_op));
+
+    q.push(std::make_pair(ds, root_op));
+
+    // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
+    while (!q.empty()) {
+      auto node_pair = q.front();
+      q.pop();
+      // Iterate through all the direct children of the first element in our BFS queue
+      for (auto child : node_pair.first->children) {
+        auto child_ops = child->Build();
+        RETURN_UNEXPECTED_IF_NULL(child_ops);
+        auto node_op = node_pair.second;
+        // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
+        // with the execution tree and add the child and parent relationship between the nodes
+        // Note that some Dataset objects might return more than one DatasetOps
+        // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
+        for (auto child_op : *child_ops) {
+          RETURN_IF_NOT_OK(tree_->AssociateNode(child_op));
+          RETURN_IF_NOT_OK(node_op->AddChild(child_op));
+          node_op = child_op;
+        }
+        // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
+        // execution tree) to the BFS queue
+        q.push(std::make_pair(child, child_ops->back()));
+      }
+    }
+    RETURN_IF_NOT_OK(tree_->AssignRoot(root_op));
+  }
+
+  // Launch the execution tree.
+  RETURN_IF_NOT_OK(tree_->Prepare());
+  RETURN_IF_NOT_OK(tree_->Launch());
+  iterator_ = std::make_unique<DatasetIterator>(tree_);
+  RETURN_UNEXPECTED_IF_NULL(iterator_);
+
+  return rc;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
similarity index 83%
rename from mindspore/ccsrc/dataset/api/python_bindings.cc
rename to mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
index ed3f993fb8..145291ec3b 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
@@ -15,91 +15,92 @@
  */
 #include <exception>
 
-#include "dataset/api/de_pipeline.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/python_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/engine/gnn/graph.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/kernels/data/concatenate_op.h"
-#include "dataset/kernels/data/duplicate_op.h"
-#include "dataset/kernels/data/fill_op.h"
-#include "dataset/kernels/data/mask_op.h"
-#include "dataset/kernels/data/one_hot_op.h"
-#include "dataset/kernels/data/pad_end_op.h"
-#include "dataset/kernels/data/slice_op.h"
-#include "dataset/kernels/data/to_float16_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/image/bounding_box_augment_op.h"
-#include "dataset/kernels/image/center_crop_op.h"
-#include "dataset/kernels/image/cut_out_op.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/hwc_to_chw_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/normalize_op.h"
-#include "dataset/kernels/image/pad_op.h"
-#include "dataset/kernels/image/random_color_adjust_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
-#include "dataset/kernels/image/random_crop_op.h"
-#include "dataset/kernels/image/random_crop_with_bbox_op.h"
-#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
-#include "dataset/kernels/image/random_resize_op.h"
-#include "dataset/kernels/image/random_resize_with_bbox_op.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/kernels/image/random_vertical_flip_op.h"
-#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
-#include "dataset/kernels/image/rescale_op.h"
-#include "dataset/kernels/image/resize_bilinear_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/kernels/image/uniform_aug_op.h"
-#include "dataset/kernels/no_op.h"
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
-#include "dataset/text/kernels/lookup_op.h"
-#include "dataset/text/kernels/ngram_op.h"
-#include "dataset/text/kernels/to_number_op.h"
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/random.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_pk_sample.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_sequential_sample.h"
-#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/api/de_pipeline.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/python_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/kernels/no_op.h"
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/lookup_op.h"
+#include "minddata/dataset/text/kernels/ngram_op.h"
+#include "minddata/dataset/text/kernels/to_number_op.h"
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sequential_sample.h"
+#include "mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "pybind11/stl_bind.h"
 
 #ifdef ENABLE_ICU4C
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/bert_tokenizer_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/bert_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #endif
 
 namespace py = pybind11;
@@ -297,7 +298,7 @@ void bindTensor(py::module *m) {
     }))
     .def_buffer([](Tensor &tensor) {
       py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
       return info;
     })
     .def("__str__", &Tensor::ToString)
@@ -311,7 +312,7 @@ void bindTensor(py::module *m) {
         return res;
       }
       py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
       return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
     });
 
@@ -601,39 +602,57 @@ void bindTensorOps4(py::module *m) {
 
 void bindTokenizerOps(py::module *m) {
   (void)py::class_<JiebaTokenizerOp, TensorOp, std::shared_ptr<JiebaTokenizerOp>>(*m, "JiebaTokenizerOp", "")
-    .def(py::init<const std::string, std::string, JiebaMode>(), py::arg("hmm_path"), py::arg("mp_path"),
-         py::arg("mode") = JiebaMode::kMix)
+    .def(py::init<const std::string &, const std::string &, const JiebaMode &, const bool &>(), py::arg("hmm_path"),
+         py::arg("mp_path"), py::arg("mode") = JiebaMode::kMix,
+         py::arg("with_offsets") = JiebaTokenizerOp::kDefWithOffsets)
     .def("add_word",
          [](JiebaTokenizerOp &self, const std::string word, int freq) { THROW_IF_ERROR(self.AddWord(word, freq)); });
   (void)py::class_<UnicodeCharTokenizerOp, TensorOp, std::shared_ptr<UnicodeCharTokenizerOp>>(
     *m, "UnicodeCharTokenizerOp", "Tokenize a scalar tensor of UTF-8 string to Unicode characters.")
-    .def(py::init<>());
+    .def(py::init<const bool &>(), py::arg("with_offsets") = UnicodeCharTokenizerOp::kDefWithOffsets);
   (void)py::class_<LookupOp, TensorOp, std::shared_ptr<LookupOp>>(*m, "LookupOp",
-                                                                  "Tensor operation to LookUp each word")
-    .def(py::init<std::shared_ptr<Vocab>, WordIdType>(), py::arg("vocab"), py::arg("unknown"))
-    .def(py::init<std::shared_ptr<Vocab>>(), py::arg("vocab"));
-  (void)py::class_<NgramOp, TensorOp, std::shared_ptr<NgramOp>>(*m, "NgramOp", "TensorOp performs ngram mapping")
+                                                                  "Tensor operation to LookUp each word.")
+    .def(py::init([](std::shared_ptr<Vocab> vocab, const py::object &py_word) {
+      if (vocab == nullptr) {
+        THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "vocab object type is incorrect or null."));
+      }
+      if (py_word.is_none()) {
+        return std::make_shared<LookupOp>(vocab, Vocab::kNoTokenExists);
+      }
+      std::string word = py::reinterpret_borrow<py::str>(py_word);
+      WordIdType default_id = vocab->Lookup(word);
+      if (default_id == Vocab::kNoTokenExists) {
+        THROW_IF_ERROR(
+          Status(StatusCode::kUnexpectedError, "default unknown token:" + word + " doesn't exist in vocab."));
+      }
+      return std::make_shared<LookupOp>(vocab, default_id);
+    }));
+  (void)py::class_<NgramOp, TensorOp, std::shared_ptr<NgramOp>>(*m, "NgramOp", "TensorOp performs ngram mapping.")
     .def(py::init<const std::vector<int32_t> &, int32_t, int32_t, const std::string &, const std::string &,
                   const std::string &>(),
          py::arg("ngrams"), py::arg("l_pad_len"), py::arg("r_pad_len"), py::arg("l_pad_token"), py::arg("r_pad_token"),
          py::arg("separator"));
   (void)py::class_<WordpieceTokenizerOp, TensorOp, std::shared_ptr<WordpieceTokenizerOp>>(
     *m, "WordpieceTokenizerOp", "Tokenize scalar token or 1-D tokens to subword tokens.")
-    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &>(),
-         py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
-         py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
-         py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken));
+    .def(
+      py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, const bool &>(),
+      py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
+      py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
+      py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken),
+      py::arg("with_offsets") = WordpieceTokenizerOp::kDefWithOffsets);
 }
 
 void bindDependIcuTokenizerOps(py::module *m) {
 #ifdef ENABLE_ICU4C
   (void)py::class_<WhitespaceTokenizerOp, TensorOp, std::shared_ptr<WhitespaceTokenizerOp>>(
     *m, "WhitespaceTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces.")
-    .def(py::init<>());
+    .def(py::init<const bool &>(), py::arg("with_offsets") = WhitespaceTokenizerOp::kDefWithOffsets);
   (void)py::class_<UnicodeScriptTokenizerOp, TensorOp, std::shared_ptr<UnicodeScriptTokenizerOp>>(
     *m, "UnicodeScriptTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.")
     .def(py::init<>())
-    .def(py::init<bool>(), py::arg("keep_whitespace") = UnicodeScriptTokenizerOp::kDefKeepWhitespace);
+    .def(py::init<const bool &, const bool &>(),
+         py::arg("keep_whitespace") = UnicodeScriptTokenizerOp::kDefKeepWhitespace,
+         py::arg("with_offsets") = UnicodeScriptTokenizerOp::kDefWithOffsets);
   (void)py::class_<CaseFoldOp, TensorOp, std::shared_ptr<CaseFoldOp>>(
     *m, "CaseFoldOp", "Apply case fold operation on utf-8 string tensor")
     .def(py::init<>());
@@ -647,24 +666,28 @@ void bindDependIcuTokenizerOps(py::module *m) {
          py::arg("replace_all"));
   (void)py::class_<RegexTokenizerOp, TensorOp, std::shared_ptr<RegexTokenizerOp>>(
     *m, "RegexTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by regex expression pattern.")
-    .def(py::init<const std::string &, const std::string &>(), py::arg("delim_pattern"), py::arg("keep_delim_pattern"));
+    .def(py::init<const std::string &, const std::string &, const bool &>(), py::arg("delim_pattern"),
+         py::arg("keep_delim_pattern"), py::arg("with_offsets") = RegexTokenizerOp::kDefWithOffsets);
   (void)py::class_<BasicTokenizerOp, TensorOp, std::shared_ptr<BasicTokenizerOp>>(
     *m, "BasicTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by specific rules.")
-    .def(py::init<bool, bool, NormalizeForm, bool>(), py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
+    .def(py::init<const bool &, const bool &, const NormalizeForm &, const bool &, const bool &>(),
+         py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
          py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace,
          py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm,
-         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken);
+         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken,
+         py::arg("with_offsets") = BasicTokenizerOp::kDefWithOffsets);
   (void)py::class_<BertTokenizerOp, TensorOp, std::shared_ptr<BertTokenizerOp>>(*m, "BertTokenizerOp",
                                                                                 "Tokenizer used for Bert text process.")
-    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, bool, bool,
-                  NormalizeForm, bool>(),
+    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, const bool &,
+                  const bool &, const NormalizeForm &, const bool &, const bool &>(),
          py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
          py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
          py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken),
          py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
          py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace,
          py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm,
-         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken);
+         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken,
+         py::arg("with_offsets") = WordpieceTokenizerOp::kDefWithOffsets);
 #endif
 }
 
@@ -746,6 +769,11 @@ void bindInfoObjects(py::module *m) {
     .def("get_batch_num", &BatchOp::CBatchInfo::get_batch_num);
 }
 
+void bindCacheClient(py::module *m) {
+  (void)py::class_<CacheClient, std::shared_ptr<CacheClient>>(*m, "CacheClient")
+    .def(py::init<uint32_t, uint64_t, bool>());
+}
+
 void bindVocabObjects(py::module *m) {
   (void)py::class_<Vocab, std::shared_ptr<Vocab>>(*m, "Vocab")
     .def(py::init<>())
@@ -820,6 +848,12 @@ void bindGraphData(py::module *m) {
            THROW_IF_ERROR(g.GetNodeFeature(node_list, feature_types, &out));
            return out.getRow();
          })
+    .def("get_edge_feature",
+         [](gnn::Graph &g, std::shared_ptr<Tensor> edge_list, std::vector<gnn::FeatureType> feature_types) {
+           TensorRow out;
+           THROW_IF_ERROR(g.GetEdgeFeature(edge_list, feature_types, &out));
+           return out.getRow();
+         })
     .def("graph_info",
          [](gnn::Graph &g) {
            py::dict out;
@@ -911,6 +945,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
   bindSamplerOps(&m);
   bindDatasetOps(&m);
   bindInfoObjects(&m);
+  bindCacheClient(&m);
   bindVocabObjects(&m);
   bindGraphData(&m);
   bindDependIcuTokenizerOps(&m);
diff --git a/mindspore/ccsrc/minddata/dataset/api/samplers.cc b/mindspore/ccsrc/minddata/dataset/api/samplers.cc
new file mode 100644
index 0000000000..91421f0ff8
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/samplers.cc
@@ -0,0 +1,224 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/include/samplers.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+SamplerObj::SamplerObj() {}
+
+/// Function to create a Distributed Sampler.
+std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle,
+                                                          int64_t num_samples, uint32_t seed) {
+  auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a PK Sampler.
+std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) {
+  auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Random Sampler.
+std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) {
+  auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Sequential Sampler.
+std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) {
+  auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Subset Random Sampler.
+std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) {
+  auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Weighted Random Sampler.
+std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples,
+                                                                bool replacement) {
+  auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/* ####################################### Derived Sampler classes ################################# */
+
+// DistributedSampler
+DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
+                                             uint32_t seed)
+    : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {}
+
+bool DistributedSamplerObj::ValidateParams() {
+  if (num_shards_ <= 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (shard_id_ < 0 || shard_id_ >= num_shards_) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> DistributedSamplerObj::Build() {
+  return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_);
+}
+
+// PKSampler
+PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples)
+    : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {}
+
+bool PKSamplerObj::ValidateParams() {
+  if (num_val_ <= 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> PKSamplerObj::Build() {
+  return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_);
+}
+
+// RandomSampler
+RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples)
+    : replacement_(replacement), num_samples_(num_samples) {}
+
+bool RandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> RandomSamplerObj::Build() {
+  bool reshuffle_each_epoch = true;
+  auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch);
+  return sampler;
+}
+
+// SequentialSampler
+SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples)
+    : start_index_(start_index), num_samples_(num_samples) {}
+
+bool SequentialSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  if (start_index_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SequentialSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_);
+  return sampler;
+}
+
+// SubsetRandomSampler
+SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples)
+    : indices_(indices), num_samples_(num_samples) {}
+
+bool SubsetRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_);
+  return sampler;
+}
+
+// WeightedRandomSampler
+WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples,
+                                                   bool replacement)
+    : weights_(weights), num_samples_(num_samples), replacement_(replacement) {}
+
+bool WeightedRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_);
+  return sampler;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc
new file mode 100644
index 0000000000..59a25ef9f5
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc
@@ -0,0 +1,491 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+TensorOperation::TensorOperation() {}
+
+// Transform operations for computer vision.
+namespace vision {
+
+// Function to create NormalizeOperation.
+std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) {
+  auto op = std::make_shared<NormalizeOperation>(mean, std);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create DecodeOperation.
+std::shared_ptr<DecodeOperation> Decode(bool rgb) {
+  auto op = std::make_shared<DecodeOperation>(rgb);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create ResizeOperation.
+std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) {
+  auto op = std::make_shared<ResizeOperation>(size, interpolation);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomCropOperation.
+std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding,
+                                                bool pad_if_needed, std::vector<uint8_t> fill_value) {
+  auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create CenterCropOperation.
+std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) {
+  auto op = std::make_shared<CenterCropOperation>(size);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create UniformAugOperation.
+std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                                    int32_t num_ops) {
+  auto op = std::make_shared<UniformAugOperation>(operations, num_ops);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomHorizontalFlipOperation.
+std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) {
+  auto op = std::make_shared<RandomHorizontalFlipOperation>(prob);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomVerticalFlipOperation.
+std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) {
+  auto op = std::make_shared<RandomVerticalFlipOperation>(prob);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomRotationOperation.
+std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample,
+                                                        bool expand, std::vector<float> center,
+                                                        std::vector<uint8_t> fill_value) {
+  auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create PadOperation.
+std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value,
+                                  BorderType padding_mode) {
+  auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create CutOutOp.
+std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) {
+  auto op = std::make_shared<CutOutOperation>(length, num_patches);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomColorAdjustOperation.
+std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness,
+                                                              std::vector<float> contrast,
+                                                              std::vector<float> saturation, std::vector<float> hue) {
+  auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+/* ####################################### Derived TensorOperation classes ################################# */
+
+// NormalizeOperation
+NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
+
+bool NormalizeOperation::ValidateParams() {
+  if (mean_.size() != 3) {
+    MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size();
+    return false;
+  }
+
+  if (std_.size() != 3) {
+    MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size();
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<TensorOp> NormalizeOperation::Build() {
+  return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]);
+}
+
+// DecodeOperation
+DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {}
+
+bool DecodeOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); }
+
+// ResizeOperation
+ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation)
+    : size_(size), interpolation_(interpolation) {}
+
+bool ResizeOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size();
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> ResizeOperation::Build() {
+  int32_t height = size_[0];
+  int32_t width = 0;
+
+  // User specified the width value.
+  if (size_.size() == 2) {
+    width = size_[1];
+  }
+
+  return std::make_shared<ResizeOp>(height, width, interpolation_);
+}
+
+// RandomCropOperation
+RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed,
+                                         std::vector<uint8_t> fill_value)
+    : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {}
+
+bool RandomCropOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size();
+    return false;
+  }
+
+  if (padding_.empty() || padding_.size() != 4) {
+    MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()";
+    return false;
+  }
+
+  if (fill_value_.empty() || fill_value_.size() != 3) {
+    MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomCropOperation::Build() {
+  int32_t crop_height = size_[0];
+  int32_t crop_width = 0;
+
+  int32_t pad_top = padding_[0];
+  int32_t pad_bottom = padding_[1];
+  int32_t pad_left = padding_[2];
+  int32_t pad_right = padding_[3];
+
+  uint8_t fill_r = fill_value_[0];
+  uint8_t fill_g = fill_value_[1];
+  uint8_t fill_b = fill_value_[2];
+
+  // User has specified the crop_width value.
+  if (size_.size() == 2) {
+    crop_width = size_[1];
+  }
+
+  auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
+                                                  BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b);
+  return tensor_op;
+}
+
+// CenterCropOperation
+CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {}
+
+bool CenterCropOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size.";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> CenterCropOperation::Build() {
+  int32_t crop_height = size_[0];
+  int32_t crop_width = 0;
+
+  // User has specified crop_width.
+  if (size_.size() == 2) {
+    crop_width = size_[1];
+  }
+
+  std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width);
+  return tensor_op;
+}
+
+// UniformAugOperation
+UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops)
+    : operations_(operations), num_ops_(num_ops) {}
+
+bool UniformAugOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> UniformAugOperation::Build() {
+  std::vector<std::shared_ptr<TensorOp>> tensor_ops;
+  (void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
+                       [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
+  std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_);
+  return tensor_op;
+}
+
+// RandomHorizontalFlipOperation
+RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {}
+
+bool RandomHorizontalFlipOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() {
+  std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_);
+  return tensor_op;
+}
+
+// RandomVerticalFlipOperation
+RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {}
+
+bool RandomVerticalFlipOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() {
+  std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_);
+  return tensor_op;
+}
+
+// Function to create RandomRotationOperation.
+RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode,
+                                                 bool expand, std::vector<float> center,
+                                                 std::vector<uint8_t> fill_value)
+    : degrees_(degrees),
+      interpolation_mode_(interpolation_mode),
+      expand_(expand),
+      center_(center),
+      fill_value_(fill_value) {}
+
+bool RandomRotationOperation::ValidateParams() {
+  if (degrees_.empty() || degrees_.size() != 2) {
+    MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()";
+    return false;
+  }
+  if (center_.empty() || center_.size() != 2) {
+    MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()";
+    return false;
+  }
+  if (fill_value_.empty() || fill_value_.size() != 3) {
+    MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomRotationOperation::Build() {
+  std::shared_ptr<RandomRotationOp> tensor_op =
+    std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_,
+                                       fill_value_[0], fill_value_[1], fill_value_[2]);
+  return tensor_op;
+}
+
+// PadOperation
+PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode)
+    : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {}
+
+bool PadOperation::ValidateParams() {
+  if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) {
+    MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()";
+    return false;
+  }
+
+  if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) {
+    MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> PadOperation::Build() {
+  int32_t pad_top, pad_bottom, pad_left, pad_right;
+  switch (padding_.size()) {
+    case 1:
+      pad_left = padding_[0];
+      pad_top = padding_[0];
+      pad_right = padding_[0];
+      pad_bottom = padding_[0];
+      break;
+    case 2:
+      pad_left = padding_[0];
+      pad_top = padding_[1];
+      pad_right = padding_[0];
+      pad_bottom = padding_[1];
+      break;
+    default:
+      pad_left = padding_[0];
+      pad_top = padding_[1];
+      pad_right = padding_[2];
+      pad_bottom = padding_[3];
+  }
+  uint8_t fill_r, fill_g, fill_b;
+
+  fill_r = fill_value_[0];
+  fill_g = fill_value_[0];
+  fill_b = fill_value_[0];
+
+  if (fill_value_.size() == 3) {
+    fill_r = fill_value_[0];
+    fill_g = fill_value_[1];
+    fill_b = fill_value_[2];
+  }
+
+  std::shared_ptr<PadOp> tensor_op =
+    std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b);
+  return tensor_op;
+}
+
+// CutOutOperation
+CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {}
+
+bool CutOutOperation::ValidateParams() {
+  if (length_ < 0) {
+    MS_LOG(ERROR) << "CutOut: length cannot be negative";
+    return false;
+  }
+  if (num_patches_ < 0) {
+    MS_LOG(ERROR) << "CutOut: number of patches cannot be negative";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> CutOutOperation::Build() {
+  std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0);
+  return tensor_op;
+}
+
+// RandomColorAdjustOperation.
+RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast,
+                                                       std::vector<float> saturation, std::vector<float> hue)
+    : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {}
+
+bool RandomColorAdjustOperation::ValidateParams() {
+  // Do some input validation.
+  if (brightness_.empty() || brightness_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values";
+    return false;
+  }
+  if (contrast_.empty() || contrast_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values";
+    return false;
+  }
+  if (saturation_.empty() || saturation_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values";
+    return false;
+  }
+  if (hue_.empty() || hue_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() {
+  float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub;
+
+  brightness_lb = brightness_[0];
+  brightness_ub = brightness_[0];
+
+  if (brightness_.size() == 2) brightness_ub = brightness_[1];
+
+  contrast_lb = contrast_[0];
+  contrast_ub = contrast_[0];
+
+  if (contrast_.size() == 2) contrast_ub = contrast_[1];
+
+  saturation_lb = saturation_[0];
+  saturation_ub = saturation_[0];
+
+  if (saturation_.size() == 2) saturation_ub = saturation_[1];
+
+  hue_lb = hue_[0];
+  hue_ub = hue_[0];
+
+  if (hue_.size() == 2) hue_ub = hue_[1];
+
+  std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>(
+    brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub);
+  return tensor_op;
+}
+
+}  // namespace vision
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
similarity index 70%
rename from mindspore/ccsrc/dataset/core/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
index 27b9f0e13b..bfe6e67563 100644
--- a/mindspore/ccsrc/dataset/core/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
@@ -1,10 +1,6 @@
-ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
-ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(core OBJECT
-  ${EXAMPLE_SRCS}
-  ${FEATURE_SRCS}
+set(DATASET_CORE_SRC_FILES
   client.cc
   config_manager.cc
   cv_tensor.cc
@@ -13,6 +9,13 @@ add_library(core OBJECT
   tensor.cc
   tensor_row.cc
   tensor_shape.cc
-  )
+)
+
+ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
+ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
+add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS})
 add_dependencies(core mindspore::protobuf)
-target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+
+if (ENABLE_PYTHON)
+  target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
diff --git a/mindspore/ccsrc/dataset/core/client.cc b/mindspore/ccsrc/minddata/dataset/core/client.cc
similarity index 80%
rename from mindspore/ccsrc/dataset/core/client.cc
rename to mindspore/ccsrc/minddata/dataset/core/client.cc
index 6247ddae7d..e3fd844e66 100644
--- a/mindspore/ccsrc/dataset/core/client.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/client.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/services.h"
-#include "dataset/util/sig_handler.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/sig_handler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/core/client.h b/mindspore/ccsrc/minddata/dataset/core/client.h
new file mode 100644
index 0000000000..78b298e616
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/core/client.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_CORE_CLIENT_H_
+#define DATASET_CORE_CLIENT_H_
+
+// client.h
+// Include file for DE client functions
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/engine/datasetops/barrier_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/build_vocab_op.h"
+#endif
+
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/datasetops/concat_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+// This is a one-time global initializer that needs to be called at the
+// start of any minddata applications.
+extern Status GlobalInit();
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_CORE_CLIENT_H_
diff --git a/mindspore/ccsrc/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/config_manager.cc
rename to mindspore/ccsrc/minddata/dataset/core/config_manager.cc
index 9291a8f832..e1fc7f29ba 100644
--- a/mindspore/ccsrc/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/config_manager.h"
 
 #include <fstream>
 #include <iostream>
 #include <string>
 
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/config_manager.h b/mindspore/ccsrc/minddata/dataset/core/config_manager.h
similarity index 97%
rename from mindspore/ccsrc/dataset/core/config_manager.h
rename to mindspore/ccsrc/minddata/dataset/core/config_manager.h
index 807591daa1..a8e1907c41 100644
--- a/mindspore/ccsrc/dataset/core/config_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.h
@@ -22,9 +22,9 @@
 
 #include <nlohmann/json.hpp>
 
-#include "dataset/core/constants.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 
 // Config settings for the client-side
 // example config file:
diff --git a/mindspore/ccsrc/dataset/core/constants.h b/mindspore/ccsrc/minddata/dataset/core/constants.h
similarity index 89%
rename from mindspore/ccsrc/dataset/core/constants.h
rename to mindspore/ccsrc/minddata/dataset/core/constants.h
index 34d2f2583c..c85ef52bf5 100644
--- a/mindspore/ccsrc/dataset/core/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/core/constants.h
@@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
 // Possible flavours of Tensor implementations
 enum class TensorImpl { kNone, kFlexible, kCv, kNP };
 
+// Possible values for Border types
+enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
+
+// Possible interpolation modes
+enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
+
 // convenience functions for 32bit int bitmask
 inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
 
diff --git a/mindspore/ccsrc/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/cv_tensor.cc
rename to mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
index 16921e8b2d..5af748b5de 100644
--- a/mindspore/ccsrc/dataset/core/cv_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
 
 #include <memory>
 #include <vector>
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/cv_tensor.h b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/cv_tensor.h
rename to mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
index 8c136f5f3c..a614418be6 100644
--- a/mindspore/ccsrc/dataset/core/cv_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
@@ -24,9 +24,9 @@
 
 #include "./securec.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/data_type.cc b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/data_type.cc
rename to mindspore/ccsrc/minddata/dataset/core/data_type.cc
index bb10fae52f..b5641e3105 100644
--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
@@ -13,12 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/data_type.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
 
 #include "utils/log_adapter.h"
 
-#include "dataset/core/pybind_support.h"
-
 namespace mindspore {
 namespace dataset {
 
@@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
     return 0;
 }
 
+#ifdef ENABLE_PYTHON
 py::dtype DataType::AsNumpyType() const {
   if (type_ < DataType::NUM_OF_TYPES)
     return py::dtype(kTypeInfo[type_].pybindType_);
   else
     return py::dtype("unknown");
 }
+#endif
 
 uint8_t DataType::AsCVType() const {
   uint8_t res = kCVInvalidType;
@@ -112,6 +115,7 @@ std::string DataType::ToString() const {
     return "unknown";
 }
 
+#ifdef ENABLE_PYTHON
 DataType DataType::FromNpArray(const py::array &arr) {
   if (py::isinstance<py::array_t<bool>>(arr)) {
     return DataType(DataType::DE_BOOL);
@@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
   }
   return res;
 }
+#endif
 
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/core/data_type.h
similarity index 83%
rename from mindspore/ccsrc/dataset/core/data_type.h
rename to mindspore/ccsrc/minddata/dataset/core/data_type.h
index a487f3300e..db4834cae2 100644
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.h
@@ -19,14 +19,16 @@
 #include <opencv2/core/hal/interface.h>
 
 #include <string>
-
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
-
-#include "dataset/core/constants.h"
-#include "dataset/core/pybind_support.h"
-
+#include "minddata/dataset/core/pybind_support.h"
 namespace py = pybind11;
+#else
+#include "Eigen/Core"
+using float16 = Eigen::half;
+#endif
+#include "minddata/dataset/core/constants.h"
 namespace mindspore {
 namespace dataset {
 
@@ -59,6 +61,7 @@ class DataType {
     const uint8_t cvType_;                      // OpenCv matching type
   };
 
+#ifdef ENABLE_PYTHON
   static inline const TypeInfo kTypeInfo[] = {
     // name, sizeInBytes, pybindTypem formatDescriptor, openCV
     {"unknown", 0, "object", "", kCVInvalidType},                                        // DE_UNKNOWN
@@ -76,19 +79,38 @@ class DataType {
     {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F},           // DE_FLOAT64
     {"string", 0, "bytes", "S", kCVInvalidType}                                          // DE_STRING
   };
+#else
+  static inline const TypeInfo kTypeInfo[] = {
+    // name, sizeInBytes, pybindTypem formatDescriptor, openCV
+    {"unknown", 0, "object", "", kCVInvalidType},  // DE_UNKNOWN
+    {"bool", 1, "bool", "", CV_8U},                // DE_BOOL
+    {"int8", 1, "int8", "", CV_8S},                // DE_INT8
+    {"uint8", 1, "uint8", "", CV_8U},              // DE_UINT8
+    {"int16", 2, "int16", "", CV_16S},             // DE_INT16
+    {"uint16", 2, "uint16", "", CV_16U},           // DE_UINT16
+    {"int32", 4, "int32", "", CV_32S},             // DE_INT32
+    {"uint32", 4, "uint32", "", kCVInvalidType},   // DE_UINT32
+    {"int64", 8, "int64", "", kCVInvalidType},     // DE_INT64
+    {"uint64", 8, "uint64", "", kCVInvalidType},   // DE_UINT64
+    {"float16", 2, "float16", "", CV_16F},         // DE_FLOAT16
+    {"float32", 4, "float32", "", CV_32F},         // DE_FLOAT32
+    {"float64", 8, "double", "", CV_64F},          // DE_FLOAT64
+    {"string", 0, "bytes", "", kCVInvalidType}     // DE_STRING
+  };
+#endif
 
   // No arg constructor to create an unknown shape
   DataType() : type_(DE_UNKNOWN) {}
 
   // Create a type from a given string
-  // @param type_str
+  /// \param type_str
   explicit DataType(const std::string &type_str);
 
   // Default destructor
   ~DataType() = default;
 
   // Create a type from a given enum
-  // @param d
+  /// \param d
   constexpr explicit DataType(Type d) : type_(d) {}
 
   constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
@@ -100,49 +122,49 @@ class DataType {
   constexpr bool operator!=(const Type a) const { return type_ != a; }
 
   // Disable this usage `if(d)` where d is of type DataType
-  // @return
+  /// \return
   operator bool() = delete;
 
   // To be used in Switch/case
-  // @return
+  /// \return
   operator Type() const { return type_; }
 
   // The number of bytes needed to store one value of this type
-  // @return
+  /// \return
   uint8_t SizeInBytes() const;
 
   // Convert from DataType to OpenCV type
-  // @return
+  /// \return
   uint8_t AsCVType() const;
 
   // Convert from OpenCV type to DataType
-  // @param cv_type
-  // @return
+  /// \param cv_type
+  /// \return
   static DataType FromCVType(int cv_type);
 
   // Returns a string representation of the type
-  // @return
+  /// \return
   std::string ToString() const;
 
   // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
   template <typename T>
   bool IsCompatible() const {
     return type_ == FromCType<T>();
   }
 
   // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
   template <typename T>
   bool IsLooselyCompatible() const;
 
   // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the DataType to display
-  // @return - the output stream must be returned
+  /// \notes This allows you to print the info using stream operators
+  /// \param out - reference to the output stream being overloaded
+  /// \param rO - reference to the DataType to display
+  /// \return - the output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
     out << so.ToString();
     return out;
@@ -151,22 +173,24 @@ class DataType {
   template <typename T>
   static DataType FromCType();
 
+#ifdef ENABLE_PYTHON
   // Convert from DataType to Pybind type
-  // @return
+  /// \return
   py::dtype AsNumpyType() const;
 
   // Convert from NP type to DataType
-  // @param type
-  // @return
+  /// \param type
+  /// \return
   static DataType FromNpType(const py::dtype &type);
 
   // Convert from NP array to DataType
-  // @param py array
-  // @return
+  /// \param py array
+  /// \return
   static DataType FromNpArray(const py::array &arr);
+#endif
 
   // Get the buffer string format of the current type. Used in pybind buffer protocol.
-  // @return
+  /// \return
   std::string GetPybindFormat() const;
 
   bool IsSignedInt() const {
diff --git a/mindspore/ccsrc/dataset/core/example.proto b/mindspore/ccsrc/minddata/dataset/core/example.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/core/example.proto
rename to mindspore/ccsrc/minddata/dataset/core/example.proto
diff --git a/mindspore/ccsrc/dataset/core/feature.proto b/mindspore/ccsrc/minddata/dataset/core/feature.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/core/feature.proto
rename to mindspore/ccsrc/minddata/dataset/core/feature.proto
diff --git a/mindspore/ccsrc/dataset/core/global_context.cc b/mindspore/ccsrc/minddata/dataset/core/global_context.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/core/global_context.cc
rename to mindspore/ccsrc/minddata/dataset/core/global_context.cc
index 3de8e0fcd8..eb76382ab2 100644
--- a/mindspore/ccsrc/dataset/core/global_context.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 
 #include <memory>
 #include <mutex>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/global_context.h b/mindspore/ccsrc/minddata/dataset/core/global_context.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/global_context.h
rename to mindspore/ccsrc/minddata/dataset/core/global_context.h
index ee0cbfbbe0..fe0847f639 100644
--- a/mindspore/ccsrc/dataset/core/global_context.h
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.h
@@ -19,9 +19,9 @@
 #include <memory>
 #include <mutex>
 
-#include "dataset/core/constants.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/pybind_support.h b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h
similarity index 100%
rename from mindspore/ccsrc/dataset/core/pybind_support.h
rename to mindspore/ccsrc/minddata/dataset/core/pybind_support.h
diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/tensor.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor.cc
index 8de3425c5b..842615f9e1 100644
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor.h"
 
 #include <algorithm>
 #include <iomanip>
@@ -25,13 +25,15 @@
 #include <functional>
 
 #include "common/utils.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor_shape.h"
-
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/global_context.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
 namespace py = pybind11;
+#endif
+#include "minddata/dataset/core/tensor_shape.h"
+
 namespace mindspore {
 namespace dataset {
 // Helper macros for printing tensor elements
@@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
   MS_ASSERT(num_bytes == 0);
   if (shape.known()) Tensor::Reshape(shape);
 }
+
 Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
     : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
   // total bytes needed = offset array + strings
@@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
   MS_ASSERT(num_bytes == 0);
   if (shape.known()) Tensor::Reshape(shape);
 }
+
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
                             DataType type, const unsigned char *data) {
   if (!shape.known()) {
@@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
   return Status::OK();  // returns base-class shared_ptr
 }
 
+#ifdef ENABLE_PYTHON
 Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
   std::vector<dsize_t> shape;
   for (dsize_t i = 0; i < arr.ndim(); i++) {
@@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
 
   return Status::OK();  // returns base-class shared_ptr
 }
+#endif
 
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
                             const TensorShape &shape) {
@@ -513,6 +519,15 @@ const unsigned char *Tensor::GetBuffer() const {
   return data_;
 }
 
+// check for empty
+bool Tensor::HasData() const {
+  if (data_ == nullptr) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 unsigned char *Tensor::GetMutableBuffer() {
   if (!shape_.known() || type_ == DataType::DE_UNKNOWN) {
     return nullptr;
@@ -689,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
   return strides;
 }
 
-Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
-  CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
+#ifdef ENABLE_PYTHON
+Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
+  RETURN_UNEXPECTED_IF_NULL(t);
+  CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
 
-  std::string format_desc = t.type().GetPybindFormat();
+  std::string format_desc = t->type().GetPybindFormat();
   if (format_desc.empty()) {
     RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
   }
-  *out = py::buffer_info(t.GetMutableBuffer(),   /* Pointer to buffer */
-                         t.type().SizeInBytes(), /* Size of one scalar */
-                         format_desc,            /* Python struct-style format descriptor */
-                         t.Rank(),               /* Number of dimensions */
-                         t.shape().AsVector(),   /* Buffer dimensions */
-                         t.Strides());
+  *out = py::buffer_info(t->GetMutableBuffer(),   /* Pointer to buffer */
+                         t->type().SizeInBytes(), /* Size of one scalar */
+                         format_desc,             /* Python struct-style format descriptor */
+                         t->Rank(),               /* Number of dimensions */
+                         t->shape().AsVector(),   /* Buffer dimensions */
+                         t->Strides());
   return Status::OK();
 }
+#endif
 
 template <typename T>
 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
@@ -743,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
   o->swap(sv);
   return Status::OK();
 }
+
+#ifdef ENABLE_PYTHON
 // return data as numpy, should return status
 Status Tensor::GetDataAsNumpy(py::array *data) {
   RETURN_UNEXPECTED_IF_NULL(data_);
@@ -806,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
   data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
   return Status::OK();
 }
+#endif
 
 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
 
diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/tensor.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor.h
index 9fed0bbc97..b0b173e9c3 100644
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h
@@ -26,20 +26,27 @@
 #undef HAVE_STDDEF_H
 #undef HAVE_STDLIB_H
 #endif
+
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#endif
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
 #include "proto/example.pb.h"
 
+#ifdef ENABLE_PYTHON
 namespace py = pybind11;
+#endif
 namespace mindspore {
 namespace dataset {
 class Tensor;
+template <typename T>
+class Allocator;
 
 using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
 using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>;  // An allocator shared_ptr for Tensors
@@ -114,16 +121,17 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
                              const unsigned char *data = nullptr);
 
-  /// Create a copy of the input tensor
-  /// \param out [out] output tensor to be generated
-  /// \param in [in] orginal tensor to be copied
-  /// \return Status
+  // Create a copy of the input tensor
+  // @param out [out] output tensor to be generated
+  // @param in [in] orginal tensor to be copied
+  // @return Status
   static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
     const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
     *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
     return Status::OK();
   }
 
+#ifdef ENABLE_PYTHON
   // A static factory method to create a Tensor from a given py::array.
   // @param ptr output argument to hold the created Tensor
   // @param arr py::array
@@ -132,6 +140,7 @@ class Tensor {
 
   // Helper function to create a tensor from Numpy of strings
   static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
+#endif
 
   // A static factory method to create a Tensor from a given list of strings.
   // @param ptr output argument to hold the created Tensor
@@ -170,6 +179,7 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
     return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
   }
+
   // Create tensor from protobuf bytelist with uint8 or int8 types
   static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
                              const TensorShape &shape, const DataType &type, dsize_t pad_size);
@@ -277,6 +287,10 @@ class Tensor {
   // @return
   const TensorShape &shape() const { return shape_; }
 
+  /// Check if tensor has data
+  /// \return bool - true if tensor is empty
+  bool HasData() const;
+
   // Reshape the tensor. The given shape should have the same number of elements in the Tensor
   // @param shape
   virtual Status Reshape(const TensorShape &shape);
@@ -342,12 +356,12 @@ class Tensor {
 
   virtual void Squeeze();
 
-  /// Calculates the strides of the Tensor
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
-  /// The strides will be {6,2,1}.
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
-  /// The strides will be {24,8,4}.
-  /// @return vector of integers
+  // Calculates the strides of the Tensor
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
+  // The strides will be {6,2,1}.
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
+  // The strides will be {24,8,4}.
+  // @return vector of integers
   std::vector<dsize_t> Strides();
 
   std::string ToString() {
@@ -372,6 +386,7 @@ class Tensor {
   // Slice string tensors
   Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
 
+#ifdef ENABLE_PYTHON
   // Constructs numpy array from input tensor
   // @param data this data is the location of python data
   // @return Status code
@@ -379,7 +394,8 @@ class Tensor {
 
   Status GetDataAsNumpyStrings(py::array *data);
 
-  static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
+  static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
+#endif
 
   // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
   Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
@@ -566,7 +582,7 @@ class Tensor {
 
   // Return a TensorIterator that points to the start of the Tensor.
   // It's the user responsibility to use the correct type that matches the Tensor type
-  // @tparam T The type of values in the Tensor
+  // @param T The type of values in the Tensor
   // @return TensorIterator
   template <typename T>
   TensorIterator<T> begin() {
diff --git a/mindspore/ccsrc/dataset/core/tensor_row.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/tensor_row.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
index 882f6728bf..5d75730a4c 100644
--- a/mindspore/ccsrc/dataset/core/tensor_row.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
@@ -16,9 +16,8 @@
 
 #include <utility>
 
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/tensor_row.h"
 
-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
 
diff --git a/mindspore/ccsrc/dataset/core/tensor_row.h b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
similarity index 98%
rename from mindspore/ccsrc/dataset/core/tensor_row.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor_row.h
index 49bc61657c..e8f066c87b 100644
--- a/mindspore/ccsrc/dataset/core/tensor_row.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/tensor_shape.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
index a0d6b9cd8d..ff40062d37 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
@@ -15,13 +15,13 @@
  */
 #define MAX_INTEGER_DTYPE 9223372036854775807
 
-#include "dataset/core/tensor_shape.h"
+#include "minddata/dataset/core/tensor_shape.h"
 
 #include <limits>
 
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
@@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
   known_ = shape.known_;  // override with the input shape in case of unknown-rank tensor shape.
 }
 
+#ifdef ENABLE_PYTHON
 TensorShape::TensorShape(py::list l)
     : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   std::vector<dsize_t> list_c;
@@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
   }
   AddListToShape(list_c);
 }
+#endif
 
 TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
     : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
@@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
   return TensorShape(vec);
 }
 
+#ifdef ENABLE_PYTHON
 py::list TensorShape::AsPyList() {
   py::list list;
   for (auto i : raw_shape_) {
@@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
   }
   return list;
 }
+#endif
 
 TensorShape TensorShape::Squeeze() const {
   std::vector<dsize_t> new_shape;
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
similarity index 57%
rename from mindspore/ccsrc/dataset/core/tensor_shape.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
index c83e43cd7d..4944f9e32c 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
@@ -24,13 +24,16 @@
 
 #include <opencv2/core/mat.hpp>
 
+#ifdef ENABLE_PYTHON
 #include "pybind11/pybind11.h"
+namespace py = pybind11;
+#endif
 
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/allocator.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/allocator.h"
 
-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
 // Class that represents a shape of a Tensor. A shape can be:
@@ -43,7 +46,8 @@ namespace dataset {
 //        -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
 //           Example: <3,?> (the 1st dim is unknown)\n
 //              <2,?,?,?> (all dims but the 0th dim are unknown)
-//  TensorShape supports any dim > 0 and < 2^31-1
+
+/// \brief  TensorShape supports any dim > 0 and < 2^31-1
 class TensorShape {
  public:
   static constexpr dsize_t kDimUnknown = -1;  // constant for an unknown dimension
@@ -51,57 +55,59 @@ class TensorShape {
   // Force the compiler to not create a no-arg constructor
   TensorShape() = delete;
 
-  // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
   explicit TensorShape(const std::initializer_list<dsize_t> &list);
 
-  // Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
   explicit TensorShape(const std::vector<dsize_t> &list);
 
-  // Copy constructor
-  // @param shape
+  /// \brief Copy constructor
+  /// \param[in] shape
   TensorShape(const TensorShape &shape);
 
-  // construct a TensorShape via a python list
-  // @param py::list l - a list object from python
+#ifdef ENABLE_PYTHON
+  /// \brief construct a TensorShape via a python list
+  /// \param[in] py::list l - a list object from python
   explicit TensorShape(py::list l);
+#endif
 
   ~TensorShape() = default;
 
-  // Create a scalar Shape (i.e., empty shape with mKnown = true)
-  // @return TensorShape
+  /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
+  /// \return TensorShape
   static TensorShape CreateScalar() { return TensorShape({}); }
 
-  // Create a shape with an unknown rank.
-  // @return TensorShape
+  /// \brief Create a shape with an unknown rank.
+  /// \return TensorShape
   static TensorShape CreateUnknownRankShape();
 
-  // Create a shape with a known rank .
-  // @return TensorShape
+  /// \brief Create a shape with a known rank .
+  /// \return TensorShape
   static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
 
-  // Insert a new dim into a copy of the current shape.
-  // @param dim to be added
-  // @param axis the index where dim should be added
-  // @return New modified shape
+  /// \brief Insert a new dim into a copy of the current shape.
+  /// \param[in] dim to be added
+  /// \param[in] axis the index where dim should be added
+  /// \return New modified shape
   TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
 
-  // Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
-  // @param dim
-  // @return
+  /// \brief Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
+  /// \param[in] dim
+  /// \return
   TensorShape PrependDim(dsize_t dim) const;
 
-  // Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
-  // @param dim
-  // @return
+  /// \brief Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
+  /// \param[in] dim
+  /// \return
   TensorShape AppendDim(dsize_t dim) const;
 
-  // Create a shape based on OpenCV shape and type
-  // @param cv_size
-  // @param type int that represent the type in OpenCV, example CV_8U, CV_64S
+  /// \brief Create a shape based on OpenCV shape and type
+  /// \param[in] cv_size
+  /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S
   TensorShape(cv::MatSize cv_size, uint32_t type);
 
   dsize_t Size() const { return raw_shape_.size(); }
@@ -123,47 +129,50 @@ class TensorShape {
     return raw_shape_[index];
   }
 
-  // Return the Shape as a vector
-  // @return
+  /// \brief Return the Shape as a vector
+  /// \return
   std::vector<dsize_t> AsVector() const;
 
-  // Returns the class info as a string
-  // @return
+  /// \brief Returns the class info as a string
+  /// \return
   std::string ToString() const {
     std::stringstream ss;
     ss << *this;
     return ss.str();
   }
 
-  // Actual print function used by operator<<
-  // @param out output string stream
+  /// \brief Actual print function used by operator<<
+  /// \param out output string stream
   void Print(std::ostream &out) const;
 
-  // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the TensorShape to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload
+  ///     This allows you to print the info using stream operators
+  /// \param[in] out - reference to the output stream being overloaded
+  /// \param[in] rO - reference to the TensorShape to display
+  /// \return - the output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
     so.Print(out);
     return out;
   }
 
+#ifdef ENABLE_PYTHON
   py::list AsPyList();
+#endif
 
-  // Checks if the given index is a valid index for this tensor.
-  // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
-  // @param index
-  // @return bool
+  /// \brief Checks if the given index is a valid index for this tensor.
+  ///     For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
+  /// \param[in] index
+  /// \return bool
   bool IsValidIndex(const std::vector<dsize_t> &index) const;
 
   TensorShape Squeeze() const;
 
   std::vector<dsize_t> Strides() const;
 
-  // Returns the location of the item assuming row major memory layout.
-  // @param index
-  // @return
+  /// \brief Returns the location of the item assuming row major memory layout.
+  /// \param[in] index
+  /// \param[out] flat_index
+  /// \return
   Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
 
  private:
@@ -174,11 +183,11 @@ class TensorShape {
   // Vector to keep the strides of the shape. The size is rank+1
   std::vector<dsize_t, IntAlloc> strides_;
 
-  // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
-  // @tparam T list
-  // @param list Iterable list
-  // @return true if the shape is valid and no overflow would be generated when counting the number of elements.
-  //         False otherwise.
+  /// \brief Internal utility function to iterate over a list,
+  ///     check if the dim is valid and then insert it into the shape.
+  /// \param[in] list Iterable list
+  /// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
+  ///     False otherwise.
   template <typename T>
   void AddListToShape(const T &list);
 };
diff --git a/mindspore/ccsrc/dataset/engine/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
similarity index 74%
rename from mindspore/ccsrc/dataset/engine/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
index 66f95d0926..e3ead16d05 100644
--- a/mindspore/ccsrc/dataset/engine/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
@@ -2,6 +2,7 @@ add_subdirectory(datasetops)
 add_subdirectory(opt)
 add_subdirectory(gnn)
 add_subdirectory(perf)
+add_subdirectory(cache)
 if (ENABLE_TDTQUE)
   add_subdirectory(tdt)
 endif ()
@@ -17,7 +18,9 @@ add_library(engine OBJECT
 target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS})
 
 if (ENABLE_TDTQUE)
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn engine-perf)
-else()
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf)
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn engine-perf
+                   engine-cache-client engine-cache-server)
+else ()
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf
+                   engine-cache-client engine-cache-server)
 endif ()
diff --git a/mindspore/ccsrc/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
similarity index 54%
rename from mindspore/ccsrc/dataset/api/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
index 194aeed457..5e7ebea176 100644
--- a/mindspore/ccsrc/dataset/api/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
@@ -1,7 +1,8 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(APItoPython OBJECT
-  de_pipeline.cc
-  python_bindings.cc
-  )
-target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
+add_library(engine-cache-client OBJECT
+    cache_client.cc
+    cache_request.cc)
+add_library(engine-cache-server OBJECT
+    cache_service.cc
+    cache_server.cc)
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc
new file mode 100644
index 0000000000..04746131bb
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc
@@ -0,0 +1,208 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iomanip>
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/bit.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Constructor
+CacheClient::CacheClient(uint32_t session_id, uint64_t cache_mem_sz, bool spill)
+    : server_connection_id_(0), session_id_(session_id), cache_crc_(0), cache_mem_sz_(cache_mem_sz), spill_(spill) {}
+
+// print method for display cache details
+void CacheClient::Print(std::ostream &out) const {
+  out << "  Session id: " << session_id_ << "\n  Cache crc: " << cache_crc_
+      << "\n  Server cache id: " << server_connection_id_ << "\n  Cache mem size: " << cache_mem_sz_
+      << "\n  Spilling: " << std::boolalpha << spill_;
+}
+
+Status CacheClient::WriteRow(const TensorRow &row, row_id_type *row_id_from_server) const {
+  CacheRowRequest rq(server_connection_id_, cookie());
+  RETURN_IF_NOT_OK(rq.SerializeCacheRowRequest(row));
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  if (row_id_from_server != nullptr) {
+    *row_id_from_server = rq.GetRowIdAfterCache();
+  }
+  return Status::OK();
+}
+
+Status CacheClient::WriteBuffer(std::unique_ptr<DataBuffer> &&in) const {
+  std::unique_ptr<DataBuffer> db_ptr = std::move(in);
+  auto num_rows = db_ptr->NumRows();
+  std::vector<TensorRow> all_rows;
+  if (num_rows > 0) {
+    all_rows.reserve(num_rows);
+    // Break down the DataBuffer into TensorRow. We will send the requests async
+    // and then do a final wait.
+    MemGuard<CacheRowRequest> rq_arr;
+    RETURN_IF_NOT_OK(rq_arr.allocate(num_rows, server_connection_id_, cookie()));
+    CacheServer &cs = CacheServer::GetInstance();
+    for (auto i = 0; i < num_rows; ++i) {
+      TensorRow row;
+      auto rq = rq_arr[i];
+      RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+      RETURN_IF_NOT_OK(rq->SerializeCacheRowRequest(row));
+      RETURN_IF_NOT_OK(cs.PushRequest(rq));
+      // We can't let row go out of scope. Otherwise it will free all the tensor memory.
+      // So park it in the vector. When this function go out of scope, its memory
+      // will be freed.
+      all_rows.push_back(std::move(row));
+    }
+    // Now we wait for the requests to be done.
+    for (auto i = 0; i < num_rows; ++i) {
+      auto rq = rq_arr[i];
+      RETURN_IF_NOT_OK(rq->Wait());
+    }
+  }
+  return Status::OK();
+}
+
+Status CacheClient::GetRows(const std::vector<row_id_type> &row_id, TensorTable *out) const {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  BatchFetchRequest rq(server_connection_id_, row_id);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  RETURN_IF_NOT_OK(rq.RestoreRows(out));
+  return Status::OK();
+}
+
+Status CacheClient::CreateCache(uint32_t tree_crc, bool generate_id) {
+  UniqueLock lck(&mux_);
+  // To create a cache, we identify ourself at the client by:
+  // - the shared session id
+  // - a crc for the tree nodes from the cache downward
+  // Pack these 2 into a single 64 bit request id
+  //
+  // Consider this example:
+  // tree1: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> batch
+  // tree2: cifar10 --> map(rotate) --> cache (session id = 1, crc = 456) --> batch
+  // These are different trees in a single session, but the user wants to share the cache.
+  // This is not allowed because the data of these caches are different.
+  //
+  // Consider this example:
+  // tree1: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> batch
+  // tree2: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> map(rotate) --> batch
+  // These are different trees in the same session, but the cached data is the same, so it is okay
+  // to allow the sharing of this cache between these pipelines.
+
+  // The CRC is computed by the tree prepare phase and passed to this function when creating the cache.
+  // If we already have a server_connection_id_, then it means this same cache client has already been used
+  // to create a cache and some other tree is trying to use the same cache.
+  // That is allowed, however the crc better match!
+  if (server_connection_id_) {
+    if (cache_crc_ != tree_crc) {
+      RETURN_STATUS_UNEXPECTED("Attempt to re-use a cache for a different tree!");
+    }
+    // Check the state of the server. For non-mappable case where there is a build phase and a fetch phase, we should
+    // skip the build phase.
+    lck.Unlock();  // GetStat will grab the mutex again. So unlock it to prevent deadlock.
+    CacheClient::ServiceStat stat{};
+    RETURN_IF_NOT_OK(GetStat(&stat));
+    if (stat.cache_service_state == static_cast<uint8_t>(CacheService::State::kFetchPhase)) {
+      return Status(StatusCode::kDuplicateKey, __LINE__, __FILE__, "Not an error and we should bypass the build phase");
+    }
+  } else {
+    cache_crc_ = tree_crc;  // It's really a new cache we're creating so save our crc in the client
+    // Combine the session and crc.  This will form our client cache identifier.
+    connection_id_type connection_identification = (static_cast<uint64_t>(session_id_) << 32) | cache_crc_;
+    // Now execute the cache create request using this identifier and other configs
+    BaseRequest::CreateCacheFlag createFlag = BaseRequest::CreateCacheFlag::kNone;
+    if (spill_) {
+      createFlag |= BaseRequest::CreateCacheFlag::kSpillToDisk;
+    }
+    if (generate_id) {
+      createFlag |= BaseRequest::CreateCacheFlag::kGenerateRowId;
+    }
+    CreationCacheRequest rq(connection_identification, cache_mem_sz_, createFlag);
+    RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+    Status rc = rq.Wait();
+    if (rc.IsOk() || rc.get_code() == StatusCode::kDuplicateKey) {
+      server_connection_id_ = rq.GetServerConnectionId();
+      if (rc.IsOk()) {
+        // The 1st guy creating the cache will get a cookie back.
+        // But this object may be shared among pipelines and we don't want
+        // overwrite it.
+        cookie_ = rq.cookie();
+      }
+    }
+    // We are not resetting the Duplicate key return code. We are passing it back to the CacheOp. This will tell the
+    // CacheOp to bypass the build phase.
+    return rc;
+  }
+  return Status::OK();
+}
+
+Status CacheClient::PurgeCache() {
+  UniqueLock lck(&mux_);
+  PurgeCacheRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  return rq.Wait();
+}
+
+Status CacheClient::DestroyCache() {
+  UniqueLock lck(&mux_);
+  DestroyCacheRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  return rq.Wait();
+}
+
+Status CacheClient::GetStat(ServiceStat *stat) {
+  SharedLock lck(&mux_);
+  RETURN_UNEXPECTED_IF_NULL(stat);
+  GetStatRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  stat->num_disk_cached = rq.GetNumDiskCached();
+  stat->num_mem_cached = rq.GetNumMemCached();
+  stat->min_row_id = rq.GetMinRowId();
+  stat->max_row_id = rq.GetMaxRowId();
+  stat->cache_service_state = rq.GetState();
+  return Status::OK();
+}
+
+Status CacheClient::CacheSchema(const std::unordered_map<std::string, int32_t> &map) {
+  SharedLock lck(&mux_);
+  CacheSchemaRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(rq.SerializeCacheSchemaRequest(map));
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  return Status::OK();
+}
+
+Status CacheClient::FetchSchema(std::unordered_map<std::string, int32_t> *map) {
+  SharedLock lck(&mux_);
+  RETURN_UNEXPECTED_IF_NULL(map);
+  FetchSchemaRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  *map = rq.GetColumnMap();
+  return Status::OK();
+}
+
+Status CacheClient::BuildPhaseDone() const {
+  SharedLock lck(&mux_);
+  BuildPhaseDoneRequest rq(server_connection_id_, cookie());
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h
new file mode 100644
index 0000000000..f25db87578
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_CACHE_CLIENT_H_
+#define DATASET_ENGINE_CACHE_CLIENT_H_
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/util/lock.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief A CacheClient is a bridge between a DatasetOp and a CacheServer. All communications are through
+/// a CacheClient. Typical tasks including like creating a cache service, cache a data buffer, restore a previously
+/// rows, etc.
+class CacheClient {
+ public:
+  /// \brief Constructor
+  /// \param session_id A user assigned session id for the current pipeline
+  /// \param cache_mem_sz Size of the memory set aside for the row caching. 0 for unlimited
+  /// \param spill Spill to disk if out of memory
+  CacheClient(uint32_t session_id, uint64_t cache_mem_sz, bool spill);
+
+  /// \brief Destructor
+  ~CacheClient() = default;
+
+  /// \brief Getter function for returning the current session id
+  /// \return session id
+  uint64_t session_id() const { return session_id_; }
+
+  /// \brief Send a TensorRow to the cache server
+  /// \param[in] row
+  /// \param[out] row_id_from_server Optional. The row id assigned by the server for non-mappable dataset
+  /// \return return code
+  Status WriteRow(const TensorRow &row, row_id_type *row_id_from_server = nullptr) const;
+
+  /// \brief Send a DataBuffer to the cache server
+  /// \param in Unique pointer of the DataBuffer to be cached
+  /// \return return code
+  Status WriteBuffer(std::unique_ptr<DataBuffer> &&in) const;
+
+  /// \brief Fetch a list of rows from the cache server. An empty TensorRow will be returned if there is
+  /// any cache miss
+  /// \param row_id A vector of row id's
+  /// \param out A TensorTable of TensorRows.
+  /// \return return code
+  Status GetRows(const std::vector<row_id_type> &row_id, TensorTable *out) const;
+
+  /// \brief Create a cache.
+  /// \param tree_crc  A crc that was generated during tree prepare phase
+  /// \param generate_id Let the cache service generate row id
+  /// \return Status object
+  Status CreateCache(uint32_t tree_crc, bool generate_id);
+
+  /// \brief Purge a cache. Cache can be reused after reset.
+  /// \return Status object
+  Status PurgeCache();
+
+  /// \brief Destroy a cache. Like Purge but the cache is deleted and can't be reused.
+  /// \return Status object
+  Status DestroyCache();
+
+  /// \brief Get the statistics from a cache.
+  /// \param[in/out] Pointer to a pre-allocated ServiceStat object
+  /// \return Status object
+  struct ServiceStat {
+    int64_t num_mem_cached;
+    int64_t num_disk_cached;
+    row_id_type min_row_id;
+    row_id_type max_row_id;
+    int8_t cache_service_state;
+  };
+  Status GetStat(ServiceStat *);
+
+  /// \brief Cache the schema at the cache server
+  /// \param map The unordered map of the schema
+  /// \return Status object
+  Status CacheSchema(const std::unordered_map<std::string, int32_t> &map);
+
+  /// \brief Fetch the schema from the cache server
+  /// \param map Pointer to pre-allocated map object
+  /// \return Status object.
+  Status FetchSchema(std::unordered_map<std::string, int32_t> *map);
+
+  /// \brief Change the state from build phase to read phase. Applicable to non-mappable dataset only. Only the cache
+  /// client that holds cookie can be allowed to make this request
+  /// \return Status object
+  Status BuildPhaseDone() const;
+
+  /// \brief A print method typically used for debugging
+  /// \param out The output stream to write output to
+  void Print(std::ostream &out) const;
+
+  /// \brief Stream output operator overload
+  /// \return the output stream must be returned
+  friend std::ostream &operator<<(std::ostream &out, const CacheClient &cc) {
+    cc.Print(out);
+    return out;
+  }
+
+  /// \brief Every cache server has a cookie which uniquely identifies the CacheClient that creates it.
+  /// \return Cookie
+  std::string cookie() const { return cookie_; }
+
+ private:
+  mutable RWLock mux_;
+  uint64_t cache_mem_sz_;
+  bool spill_;
+  // The session_id_ and cache_crc_ work together to uniquely identify this particular cache and allow
+  // sharing of the cache.
+  uint32_t session_id_;
+  uint32_t cache_crc_;
+  // The server_connection_id_ is the actual id we use for operations after the cache is built
+  connection_id_type server_connection_id_;
+  // Some magic cookie returned from the cache server.
+  std::string cookie_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_CACHE_CLIENT_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc
new file mode 100644
index 0000000000..3b7fc057a2
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc
@@ -0,0 +1,223 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_request.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status CacheRowRequest::SerializeCacheRowRequest(const TensorRow &row) {
+  buffers_.reserve(row.size() + 1);
+  RETURN_IF_NOT_OK(SerializeTensorRowHeader(row));
+  buffers_.push_back(fbb_->GetBufferPointer());
+  for (const auto &ts : row) {
+    buffers_.push_back(ts->GetBuffer());
+  }
+  return Status::OK();
+}
+
+Status CacheRowRequest::SerializeTensorRowHeader(const TensorRow &row) {
+  try {
+    fbb_ = std::make_shared<flatbuffers::FlatBufferBuilder>();
+    std::vector<flatbuffers::Offset<TensorMetaMsg>> v;
+    std::vector<int64_t> tensor_sz;
+    v.reserve(row.size());
+    tensor_sz.reserve(row.size());
+    // We will go through each column in the row.
+    for (const std::shared_ptr<Tensor> &ts_ptr : row) {
+      flatbuffers::Offset<TensorMetaMsg> ts_off;
+      RETURN_IF_NOT_OK(SerializeOneTensorMeta(ts_ptr, &ts_off));
+      v.push_back(ts_off);
+      tensor_sz.push_back(ts_ptr->SizeInBytes());
+    }
+    auto column_off = fbb_->CreateVector(v);
+    auto data_sz_off = fbb_->CreateVector(tensor_sz);
+    TensorRowHeaderMsgBuilder row_builder(*fbb_);
+    row_builder.add_column(column_off);
+    row_builder.add_data_sz(data_sz_off);
+    // Pass the row_id even if it may not be known.
+    row_builder.add_row_id(row.getId());
+    row_builder.add_size_of_this(-1);  // fill in later after we call Finish.
+    auto out = row_builder.Finish();
+    fbb_->Finish(out);
+    // Now go back to fill in size_of_this in the flat buffer.
+    auto msg = GetMutableTensorRowHeaderMsg(fbb_->GetBufferPointer());
+    auto success = msg->mutate_size_of_this(fbb_->GetSize());
+    if (!success) {
+      RETURN_STATUS_UNEXPECTED("Unable to set size_of_this");
+    }
+    return Status::OK();
+  } catch (const std::bad_alloc &e) {
+    return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
+  }
+}
+
+Status CacheRowRequest::SerializeOneTensorMeta(const std::shared_ptr<Tensor> &ts_ptr,
+                                               flatbuffers::Offset<TensorMetaMsg> *out_off) {
+  RETURN_UNEXPECTED_IF_NULL(out_off);
+  const Tensor *ts = ts_ptr.get();
+  auto shape_off = fbb_->CreateVector(ts->shape().AsVector());
+  const auto ptr = ts->GetBuffer();
+  if (ptr == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Tensor buffer is null");
+  }
+  auto src = ts->type().value();
+  TensorType dest;
+#define CASE(t)                        \
+  case DataType::t:                    \
+    dest = TensorType::TensorType_##t; \
+    break
+  // Map the type to fill in the flat buffer.
+  switch (src) {
+    CASE(DE_BOOL);
+    CASE(DE_INT8);
+    CASE(DE_UINT8);
+    CASE(DE_INT16);
+    CASE(DE_UINT16);
+    CASE(DE_INT32);
+    CASE(DE_UINT32);
+    CASE(DE_INT64);
+    CASE(DE_UINT64);
+    CASE(DE_FLOAT16);
+    CASE(DE_FLOAT32);
+    CASE(DE_FLOAT64);
+    CASE(DE_STRING);
+    default:
+      MS_LOG(ERROR) << "Unknown tensor. Dumping content:\n" << *ts;
+      RETURN_STATUS_UNEXPECTED("Unknown type");
+  }
+#undef CASE
+
+  TensorMetaMsgBuilder ts_builder(*fbb_);
+  ts_builder.add_dims(shape_off);
+  ts_builder.add_type(dest);
+  auto ts_off = ts_builder.Finish();
+  *out_off = ts_off;
+  return Status::OK();
+}
+
+Status BatchFetchRequest::RestoreOneTensor(const TensorMetaMsg *col_ts, const ReadableSlice &data,
+                                           std::shared_ptr<Tensor> *out) {
+  RETURN_UNEXPECTED_IF_NULL(col_ts);
+  auto shape_in = col_ts->dims();
+  auto type_in = col_ts->type();
+  std::vector<dsize_t> v;
+  v.reserve(shape_in->size());
+  v.assign(shape_in->begin(), shape_in->end());
+  TensorShape shape(v);
+  DataType::Type dest = DataType::DE_UNKNOWN;
+#define CASE(t)               \
+  case TensorType_##t:        \
+    dest = DataType::Type::t; \
+    break
+
+  switch (type_in) {
+    CASE(DE_BOOL);
+    CASE(DE_INT8);
+    CASE(DE_UINT8);
+    CASE(DE_INT16);
+    CASE(DE_UINT16);
+    CASE(DE_INT32);
+    CASE(DE_UINT32);
+    CASE(DE_INT64);
+    CASE(DE_UINT64);
+    CASE(DE_FLOAT16);
+    CASE(DE_FLOAT32);
+    CASE(DE_FLOAT64);
+    CASE(DE_STRING);
+  }
+#undef CASE
+
+  DataType type(dest);
+  std::shared_ptr<Tensor> ts =
+    std::make_shared<Tensor>(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize());
+  // Next we restore the real data which can be embedded or stored separately.
+  if (ts->SizeInBytes() != data.GetSize()) {
+    MS_LOG(ERROR) << "Unexpected length. Read " << data.GetSize() << ". Expected " << ts->SizeInBytes() << ".\n"
+                  << "Dumping tensor\n"
+                  << *ts << "\n";
+    RETURN_STATUS_UNEXPECTED("Length mismatch. See log file for details.");
+  }
+  *out = std::move(ts);
+  return Status::OK();
+}
+
+Status BatchFetchRequest::RestoreRows(TensorTable *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  auto num_elements = row_id_.size();
+  auto *offset_array = reinterpret_cast<const int64_t *>(mem_.GetPointer());
+  TensorTable tbl;
+  tbl.reserve(num_elements);
+  ReadableSlice all(mem_.GetPointer(), mem_.GetSizeInBytes());
+  for (auto i = 0; i < num_elements; ++i) {
+    auto len = offset_array[i + 1] - offset_array[i];
+    TensorRow row;
+    row.setId(row_id_.at(i));
+    if (len > 0) {
+      ReadableSlice row_data(all, offset_array[i], len);
+      // Next we de-serialize flat buffer to get back each column
+      auto msg = GetTensorRowHeaderMsg(row_data.GetPointer());
+      auto msg_sz = msg->size_of_this();
+      // Start of the tensor data
+      auto ts_offset = msg_sz;
+      row.reserve(msg->column()->size());
+      for (auto k = 0; k < msg->column()->size(); ++k) {
+        auto col_ts = msg->column()->Get(k);
+        std::shared_ptr<Tensor> ts;
+        ReadableSlice data(row_data, ts_offset, msg->data_sz()->Get(k));
+        RETURN_IF_NOT_OK(RestoreOneTensor(col_ts, data, &ts));
+        row.push_back(ts);
+        ts_offset += data.GetSize();
+      }
+    }
+    tbl.push_back(std::move(row));
+  }
+  *out = std::move(tbl);
+  return Status::OK();
+}
+
+Status CacheSchemaRequest::SerializeCacheSchemaRequest(const std::unordered_map<std::string, int32_t> &map) {
+  try {
+    fbb_ = std::make_shared<flatbuffers::FlatBufferBuilder>();
+    std::vector<flatbuffers::Offset<ColumnNameMsg>> v;
+    v.reserve(map.size());
+    for (auto &column : map) {
+      auto c = CreateColumnNameMsg(*fbb_, fbb_->CreateString(column.first), column.second);
+      v.push_back(c);
+    }
+    auto v_off = fbb_->CreateVector(v);
+    auto final_off = CreateSchemaMsg(*fbb_, v_off);
+    fbb_->Finish(final_off);
+    buf_ = fbb_->GetBufferPointer();
+    len_of_buf_ = fbb_->GetSize();
+    return Status::OK();
+  } catch (const std::bad_alloc &e) {
+    return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
+  }
+}
+
+std::unordered_map<std::string, int32_t> FetchSchemaRequest::GetColumnMap() {
+  if (column_name_id_map_.empty()) {
+    auto *map_msg = flatbuffers::GetRoot<SchemaMsg>(mem_.GetPointer());
+    auto v = map_msg->column();
+    for (auto i = 0; i < v->size(); ++i) {
+      auto col = map_msg->column()->Get(i);
+      column_name_id_map_.emplace(col->name()->str(), col->id());
+    }
+  }
+  return column_name_id_map_;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h
new file mode 100644
index 0000000000..3d0edc6dd8
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h
@@ -0,0 +1,225 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#ifndef DATASET_ENGINE_CACHE_REQ_H_
+#define DATASET_ENGINE_CACHE_REQ_H_
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief CacheClient communicates with CacheServer using Requests.
+class BaseRequest {
+ public:
+  // Request types
+  enum class RequestType : int16_t {
+    kCacheRow = 0,
+    kBatchFetchRows = 1,
+    kCreateCache = 2,
+    kPurgeCache = 3,
+    kDestroyCache = 4,
+    kGetStat = 5,
+    kCacheSchema = 6,
+    kFetchSchema = 7,
+    kBuildPhaseDone = 8,
+    // Add new request before it.
+    kRequestUnknown = 32767
+  };
+  // For kCreateCache
+  enum class CreateCacheFlag : uint32_t { kNone = 0, kSpillToDisk = 1, kGenerateRowId = 1u << 1L };
+  friend class CacheServer;
+  /// \brief Base class of a cache server request
+  /// \param connection_id A combination of session id and crc that uniquely identifies a connection.
+  /// \param type Type of the request
+  explicit BaseRequest(connection_id_type connection_id, RequestType type)
+      : type_(type), connection_id_(connection_id) {}
+  virtual ~BaseRequest() = default;
+  /// \brief Wait for the completion of a request
+  /// \return Status returned from the cache server
+  Status Wait() {
+    RETURN_IF_NOT_OK(wp_.Wait());
+    return rc_;
+  }
+
+  /// \brief Getter function of the current connection id
+  /// \return Connection id
+  connection_id_type GetServerConnectionId() const { return connection_id_; }
+
+ private:
+  RequestType type_;
+  connection_id_type connection_id_;
+  Status rc_;
+  WaitPost wp_;
+};
+/// \brief Request to cache a single TensorRow
+class CacheRowRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit CacheRowRequest(connection_id_type connection_id, const std::string &cookie)
+      : BaseRequest(connection_id, RequestType::kCacheRow), row_id_from_server_(-1), cookie_(cookie) {}
+  ~CacheRowRequest() = default;
+
+  /// \brief Serialize a TensorRow for streaming to the cache server
+  /// \param row TensorRow
+  /// \return Status object
+  Status SerializeCacheRowRequest(const TensorRow &row);
+  /// \brief Return the row id assigned to this row for non-mappable dataset
+  /// \return row id of the cached row
+  row_id_type GetRowIdAfterCache() { return row_id_from_server_; }
+
+ private:
+  std::shared_ptr<flatbuffers::FlatBufferBuilder> fbb_;
+  row_id_type row_id_from_server_;
+  std::vector<const void *> buffers_;
+  std::string cookie_;
+
+  /// \brief Private function to serialize one TensorRow
+  /// \param row TensorRow
+  /// \return Status object
+  Status SerializeTensorRowHeader(const TensorRow &row);
+  /// \brief Private function to serialize one Tensor
+  /// \param ts_ptr Tensor
+  /// \return Status object
+  Status SerializeOneTensorMeta(const std::shared_ptr<Tensor> &ts_ptr, flatbuffers::Offset<TensorMetaMsg> *out_off);
+};
+/// \brief Request to fetch rows in batch
+class BatchFetchRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  friend class CacheService;
+  BatchFetchRequest(connection_id_type connection_id, const std::vector<row_id_type> &row_id)
+      : BaseRequest(connection_id, RequestType::kBatchFetchRows), row_id_(row_id) {}
+  Status RestoreRows(TensorTable *out);
+
+ private:
+  std::vector<row_id_type> row_id_;
+  MemGuard<uint8_t> mem_;
+  Status RestoreOneTensor(const TensorMetaMsg *col_ts, const ReadableSlice &data, std::shared_ptr<Tensor> *out);
+};
+/// \brief Request to create a cache for the current connection
+class CreationCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  /// \brief Constructor
+  /// \param connection_id
+  /// \param cache_mem_sz Maximum memory assigned for this connection. 0 means unlimited
+  /// \param flag Attributes of the cache.
+  explicit CreationCacheRequest(connection_id_type connection_id, uint64_t cache_mem_sz,
+                                CreateCacheFlag flag = CreateCacheFlag::kNone)
+      : BaseRequest(connection_id, RequestType::kCreateCache), cache_mem_sz(cache_mem_sz), flag_(flag) {}
+
+  std::string cookie() const { return cookie_; }
+
+ private:
+  uint64_t cache_mem_sz;
+  CreateCacheFlag flag_;
+  std::string cookie_;
+};
+/// \brief Request to purge a cache.
+class PurgeCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit PurgeCacheRequest(connection_id_type connection_id) : BaseRequest(connection_id, RequestType::kPurgeCache) {}
+};
+/// \brief Request to destroy a cache
+class DestroyCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit DestroyCacheRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kDestroyCache) {}
+};
+/// \brief Obtain the statistics of the current connection
+class GetStatRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  friend class CacheService;
+  explicit GetStatRequest(connection_id_type connection_id) : BaseRequest(connection_id, RequestType::kGetStat) {}
+  row_id_type GetMinRowId() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->min_row_id();
+  }
+  row_id_type GetMaxRowId() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->max_row_id();
+  }
+  int64_t GetNumMemCached() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->num_mem_cached();
+  }
+  int64_t GetNumDiskCached() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->num_disk_cached();
+  }
+  uint8_t GetState() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->state();
+  }
+
+ private:
+  MemGuard<uint8_t> mem_;
+};
+/// \brief Request to cache a schema
+class CacheSchemaRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit CacheSchemaRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kCacheSchema), buf_(nullptr), len_of_buf_(0) {}
+  ~CacheSchemaRequest() = default;
+
+  Status SerializeCacheSchemaRequest(const std::unordered_map<std::string, int32_t> &map);
+  const void *GetBuffer() const { return buf_; }
+
+ private:
+  std::shared_ptr<flatbuffers::FlatBufferBuilder> fbb_;
+  const void *buf_;
+  int64_t len_of_buf_;
+};
+/// \brief Request to fetch a schema
+class FetchSchemaRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit FetchSchemaRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kFetchSchema) {}
+  ~FetchSchemaRequest() = default;
+
+  std::unordered_map<std::string, int32_t> GetColumnMap();
+
+ private:
+  MemGuard<uint8_t> mem_;
+  std::unordered_map<std::string, int32_t> column_name_id_map_;
+};
+/// \brief Request to change a cache from build phase to read phase. Applies to non-mappable cache only.
+class BuildPhaseDoneRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  BuildPhaseDoneRequest(connection_id_type connection_id, const std::string &cookie)
+      : BaseRequest(connection_id, RequestType::kBuildPhaseDone), cookie_(cookie) {}
+
+ private:
+  std::string cookie_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_CACHE_SERVICE_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc
new file mode 100644
index 0000000000..c9fb6ecab1
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc
@@ -0,0 +1,252 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/bit.h"
+
+namespace mindspore {
+namespace dataset {
+Status CacheServer::DoServiceStart() {
+  if (!top_.empty()) {
+    Path spill(top_);
+    RETURN_IF_NOT_OK(spill.CreateDirectories());
+    MS_LOG(INFO) << "CacheServer will use disk folder: " << top_;
+  }
+  RETURN_IF_NOT_OK(vg_.ServiceStart());
+  cache_q_ = std::make_shared<Queue<BaseRequest *>>(1024);
+  RETURN_IF_NOT_OK(cache_q_->Register(&vg_));
+  auto f = std::bind(&CacheServer::ServerRequest, this);
+  // Spawn a a few threads to serve the request.
+  for (auto i = 0; i < num_workers_; ++i) {
+    RETURN_IF_NOT_OK(vg_.CreateAsyncTask("Cache server", f));
+  }
+  return Status::OK();
+}
+
+Status CacheServer::DoServiceStop() {
+  Status rc;
+  Status rc2;
+  // First stop all the threads.
+  RETURN_IF_NOT_OK(vg_.ServiceStop());
+  // Clean up all the caches if any.
+  UniqueLock lck(&rwLock_);
+  auto it = all_caches_.begin();
+  while (it != all_caches_.end()) {
+    auto cs = std::move(it->second);
+    rc2 = cs->ServiceStop();
+    if (rc2.IsError()) {
+      rc = rc2;
+    }
+    ++it;
+  }
+  return rc;
+}
+
+CacheService *CacheServer::GetService(connection_id_type id) const {
+  SharedLock lck(&rwLock_);
+  auto it = all_caches_.find(id);
+  if (it != all_caches_.end()) {
+    return it->second.get();
+  }
+  return nullptr;
+}
+
+Status CacheServer::CreateService(connection_id_type connection_id, uint64_t cache_mem_sz,
+                                  BaseRequest::CreateCacheFlag flag, std::string *out_cookie) {
+  // We can't do spilling unless this server is setup with a spill path in the first place
+  bool spill = (flag & BaseRequest::CreateCacheFlag::kSpillToDisk) == BaseRequest::CreateCacheFlag::kSpillToDisk;
+  bool generate_id =
+    (flag & BaseRequest::CreateCacheFlag::kGenerateRowId) == BaseRequest::CreateCacheFlag::kGenerateRowId;
+  if (spill && top_.empty()) {
+    RETURN_STATUS_UNEXPECTED("Server is not set up with spill support.");
+  }
+  RETURN_UNEXPECTED_IF_NULL(out_cookie);
+  *out_cookie = "";
+  // Before creating the cache, first check if this is a request for a shared usage of an existing cache
+  // If two CreateService come in with identical connection_id, we need to serialize the create.
+  // The first create will be successful and be given a special cookie.
+  UniqueLock lck(&rwLock_);
+  auto end = all_caches_.end();
+  auto it = all_caches_.find(connection_id);
+  if (it == end) {
+    std::unique_ptr<CacheService> cs;
+    try {
+      cs = std::make_unique<CacheService>(cache_mem_sz, spill ? top_ : "", generate_id);
+      RETURN_IF_NOT_OK(cs->ServiceStart());
+      *out_cookie = cs->cookie();
+      all_caches_.emplace(connection_id, std::move(cs));
+    } catch (const std::bad_alloc &e) {
+      return Status(StatusCode::kOutOfMemory);
+    }
+  } else {
+    MS_LOG(INFO) << "Duplicate request for " + std::to_string(connection_id) + " to create cache service";
+    // We can return OK but we will return a duplicate key so user can act accordingly to either ignore it
+    // treat it as OK.
+    return Status(StatusCode::kDuplicateKey);
+  }
+  return Status::OK();
+}
+
+/// This is the main loop the cache server thread(s) are running.
+/// Each thread will pop a request and save the result in the same request.
+/// The sender will wait on the wait post in the request. Once the request
+/// is fulfilled, the server thread will do a post signalling the request is
+/// is processed.
+/// \return
+Status CacheServer::ServerRequest() {
+  TaskManager::FindMe()->Post();
+  // Loop forever until we are interrupted.
+  while (true) {
+    BaseRequest *base_rq = nullptr;
+    RETURN_IF_NOT_OK(cache_q_->PopFront(&base_rq));
+    auto cs = GetService(base_rq->connection_id_);
+    // Except for creating a new session, we expect cs is not null.
+    switch (base_rq->type_) {
+      case BaseRequest::RequestType::kCacheRow: {
+        if (cs == nullptr) {
+          std::string errMsg = "Cache id " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<CacheRowRequest *>(base_rq);
+          // Only if the cookie matches, we can accept insert into this cache that has a build phase
+          if (!cs->HasBuildPhase() || rq->cookie_ == cs->cookie()) {
+            rq->rc_ = cs->CacheRow(rq->buffers_, &rq->row_id_from_server_);
+          } else {
+            return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cookie mismatch");
+          }
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kBatchFetchRows: {
+        if (cs == nullptr) {
+          std::string errMsg = "Cache id " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<BatchFetchRequest *>(base_rq);
+          rq->rc_ = cs->BatchFetch(rq->row_id_, &rq->mem_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kCreateCache: {
+        // If the cache is already created we still need to run the creation so that we do sanity checks on the
+        // client id and return the cache id back to the user.
+        auto *rq = reinterpret_cast<CreationCacheRequest *>(base_rq);
+        rq->rc_ = CreateService(rq->connection_id_, rq->cache_mem_sz, rq->flag_, &rq->cookie_);
+        break;
+      }
+      case BaseRequest::RequestType::kPurgeCache: {
+        if (cs != nullptr) {
+          base_rq->rc_ = cs->Purge();
+        } else {
+          // it is already purged. Ignore it.
+          base_rq->rc_ = Status::OK();
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kDestroyCache: {
+        if (cs != nullptr) {
+          // We need a strong lock to protect the map.
+          connection_id_type id = base_rq->connection_id_;
+          UniqueLock lck(&rwLock_);
+          // std::map will invoke the constructor of CacheService. So we don't need to do anything here.
+          auto n = all_caches_.erase(id);
+          if (n == 0) {
+            // It has been destroyed by another duplicate request.
+            MS_LOG(INFO) << "Duplicate request for " + std::to_string(id) + " to create cache service";
+          }
+          base_rq->rc_ = Status::OK();
+        } else {
+          // it is already destroyed. Ignore it.
+          base_rq->rc_ = Status::OK();
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kGetStat: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<GetStatRequest *>(base_rq);
+          CacheService::ServiceStat svc_stat;
+          rq->rc_ = cs->GetStat(&svc_stat);
+          if (rq->rc_.IsOk()) {
+            flatbuffers::FlatBufferBuilder fbb;
+            ServiceStatMsgBuilder bld(fbb);
+            bld.add_num_disk_cached(svc_stat.stat_.num_disk_cached);
+            bld.add_num_mem_cached(svc_stat.stat_.num_mem_cached);
+            bld.add_max_row_id(svc_stat.max_);
+            bld.add_min_row_id(svc_stat.min_);
+            bld.add_state(svc_stat.state_);
+            auto offset = bld.Finish();
+            fbb.Finish(offset);
+            rq->rc_ = rq->mem_.allocate(fbb.GetSize());
+            if (rq->rc_.IsOk()) {
+              WritableSlice dest(rq->mem_.GetMutablePointer(), fbb.GetSize());
+              ReadableSlice src(fbb.GetBufferPointer(), fbb.GetSize());
+              RETURN_IF_NOT_OK(WritableSlice::Copy(&dest, src));
+            }
+          }
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kCacheSchema: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<CacheSchemaRequest *>(base_rq);
+          rq->rc_ = cs->CacheSchema(rq->buf_, rq->len_of_buf_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kFetchSchema: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<FetchSchemaRequest *>(base_rq);
+          rq->rc_ = cs->FetchSchema(&rq->mem_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kBuildPhaseDone: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<BuildPhaseDoneRequest *>(base_rq);
+          // We can only allow to switch phase is the cookie match.
+          if (rq->cookie_ == cs->cookie()) {
+            rq->rc_ = cs->BuildPhaseDone();
+          } else {
+            return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cookie mismatch");
+          }
+        }
+        break;
+      }
+      default:
+        base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Unknown request type");
+    }
+    // Notify it is done, and move on to the next request.
+    base_rq->wp_.Set();
+  }
+  return Status::OK();
+}
+CacheServer::CacheServer(const std::string &spill_path, int32_t num_workers)
+    : top_(spill_path), num_workers_(num_workers) {}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h
new file mode 100644
index 0000000000..13b68c4389
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef DATASET_ENGINE_CACHE_SERVER_H_
+#define DATASET_ENGINE_CACHE_SERVER_H_
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include <map>
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/task_manager.h"
+
+namespace mindspore {
+namespace dataset {
+class BaseRequest;
+/// \brief A server which provides CacheService services.
+class CacheServer : public Service {
+ public:
+  friend class Services;
+  using cache_index = std::map<connection_id_type, std::unique_ptr<CacheService>>;
+
+  CacheServer(const CacheServer &) = delete;
+  CacheServer &operator=(const CacheServer &) = delete;
+  CacheServer(CacheServer &&) = delete;
+  CacheServer &operator=(CacheServer &) = delete;
+  static CacheServer &GetInstance() noexcept { return Services::getCacheServer(); }
+  Status DoServiceStart() override;
+  Status DoServiceStop() override;
+  ~CacheServer() { (void)ServiceStop(); }
+
+  /// \brief For the current demonstration, a cache client contacts cache server using a Queue.
+  /// \param rq
+  /// \return Status object
+  Status PushRequest(BaseRequest *rq) {
+    RETURN_UNEXPECTED_IF_NULL(rq);
+    RETURN_IF_NOT_OK(cache_q_->Add(rq));
+    return Status::OK();
+  }
+
+ private:
+  mutable RWLock rwLock_;
+  std::string top_;
+  cache_index all_caches_;
+  std::shared_ptr<Queue<BaseRequest *>> cache_q_;
+  TaskGroup vg_;
+  int32_t num_workers_;
+
+  /// \brief Constructor
+  /// \param spill_path Top directory for spilling buffers to.
+  /// \param num_workers Number of threads for handling requests.
+  explicit CacheServer(const std::string &spill_path, int32_t num_workers = 3);
+
+  /// \brief Locate a cache service from connection id.
+  /// \return Pointer to cache service. Null if not found
+  CacheService *GetService(connection_id_type id) const;
+
+  /// \brief Create a cache service. We allow multiple clients to create the same cache service.
+  /// Subsequent duplicate requests are ignored. The first cache client to create the service will be given
+  /// a special unique cookie.
+  /// \param[in] connection_id This is from a Cache client.
+  /// \param[in] cache_mem_sz
+  /// \param[in] flag
+  /// \param[out] out_cookie Only the first cache client will be given a special cookie to identify the creator
+  /// \return Status object
+  Status CreateService(connection_id_type connection_id, uint64_t cache_mem_sz, BaseRequest::CreateCacheFlag flag,
+                       std::string *out_cookie);
+
+  /// \brief Entry point for all server threads.
+  Status ServerRequest();
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_CORE_CACHE_TENSOR_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc
new file mode 100644
index 0000000000..4e1208d173
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc
@@ -0,0 +1,265 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/util/slice.h"
+
+namespace mindspore {
+namespace dataset {
+CacheService::CacheService(uint64_t mem_sz, const std::string &root, bool generate_id)
+    : root_(root),
+      cache_mem_sz_(mem_sz),
+      cp_(nullptr),
+      map_(nullptr),
+      next_id_(0),
+      generate_id_(generate_id),
+      schema_key_(-1),
+      st_(generate_id ? State::kBuildPhase : State::kNone) {}
+CacheService::~CacheService() { (void)ServiceStop(); }
+bool CacheService::UseArena() {
+  // If fixed size, use Arena instead of the pool from global context.
+  return (cache_mem_sz_ > 0);
+}
+Status CacheService::DoServiceStart() {
+  std::shared_ptr<MemoryPool> mp_;
+  if (UseArena()) {
+    // Create a fixed size arena based on the parameter.
+    std::shared_ptr<Arena> arena;
+    RETURN_IF_NOT_OK(Arena::CreateArena(&arena, cache_mem_sz_));
+    mp_ = std::move(arena);
+  } else {
+    // Unlimited size. Simply use a system pool. Another choice is CircularPool.
+    mp_ = std::make_shared<SystemPool>();
+  }
+  // Put together a CachePool for backing up the Tensor
+  cp_ = std::make_shared<CachePool>(CachePool::value_allocator(mp_), root_);
+  RETURN_IF_NOT_OK(cp_->ServiceStart());
+  // Set up the B+ tree as well. But use the system pool instead.
+  map_ = std::make_shared<row_map>();
+  // Assign a name to this cache. Used for exclusive connection. But we can just use CachePool's name.
+  cookie_ = cp_->MyName();
+  return Status::OK();
+}
+Status CacheService::DoServiceStop() {
+  if (cp_ != nullptr) {
+    RETURN_IF_NOT_OK(cp_->ServiceStop());
+  }
+  return Status::OK();
+}
+Status CacheService::CacheRow(const std::vector<const void *> &buf, row_id_type *row_id_generated) {
+  SharedLock rw(&rw_lock_);
+  RETURN_UNEXPECTED_IF_NULL(row_id_generated);
+  if (st_ == State::kFetchPhase) {
+    // For this kind of cache service, once we are done with the build phase into fetch phase, we can't
+    // allow other to cache more rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  try {
+    // The first buffer is a flatbuffer which describes the rest of the buffers follow
+    auto fb = buf.front();
+    RETURN_UNEXPECTED_IF_NULL(fb);
+    auto msg = GetTensorRowHeaderMsg(fb);
+    // If the server side is designed to ignore incoming row id, we generate row id.
+    if (generate_id_) {
+      *row_id_generated = GetNextRowId();
+      // Some debug information on how many rows we have generated so far.
+      if ((*row_id_generated) % 1000 == 0) {
+        MS_LOG(DEBUG) << "Number of rows cached: " << *row_id_generated;
+      }
+    } else {
+      if (msg->row_id() < 0) {
+        std::string errMsg = "Expect positive row id: " + std::to_string(msg->row_id());
+        RETURN_STATUS_UNEXPECTED(errMsg);
+      }
+      *row_id_generated = msg->row_id();
+    }
+    auto size_of_this = msg->size_of_this();
+    auto column_hdr = msg->column();
+    // Number of tensor buffer should match the number of columns plus one.
+    if (buf.size() != column_hdr->size() + 1) {
+      std::string errMsg = "Column count does not match. Expect " + std::to_string(column_hdr->size() + 1) +
+                           " but get " + std::to_string(buf.size());
+      RETURN_STATUS_UNEXPECTED(errMsg);
+    }
+    // Next we store in either memory or on disk. Low level code will consolidate everything in one piece.
+    std::vector<ReadableSlice> all_data;
+    all_data.reserve(column_hdr->size() + 1);
+    all_data.emplace_back(fb, size_of_this);
+    for (auto i = 0; i < column_hdr->size(); ++i) {
+      all_data.emplace_back(buf.at(i + 1), msg->data_sz()->Get(i));
+    }
+    // Now we cache the flat buffer.
+    CachePool::key_type key;
+    RETURN_IF_NOT_OK(cp_->Insert(all_data, &key));
+    Status rc = map_->DoInsert(*row_id_generated, key);
+    if (rc == Status(StatusCode::kDuplicateKey)) {
+      MS_LOG(DEBUG) << "Ignoring duplicate key.";
+    } else {
+      RETURN_IF_NOT_OK(rc);
+    }
+    return Status::OK();
+  } catch (const std::exception &e) {
+    RETURN_STATUS_UNEXPECTED(e.what());
+  }
+}
+std::ostream &operator<<(std::ostream &out, const CacheService &cs) {
+  // Then show any custom derived-internal stuff
+  out << "\nCache memory size: " << cs.cache_mem_sz_;
+  out << "\nSpill path: ";
+  if (cs.root_.empty()) {
+    out << "None";
+  } else {
+    out << cs.GetSpillPath();
+  }
+  return out;
+}
+Path CacheService::GetSpillPath() const { return cp_->GetSpillPath(); }
+Status CacheService::Purge() {
+  // First we must lock exclusively. No one else can cache/restore anything.
+  UniqueLock rw(&rw_lock_);
+  RETURN_IF_NOT_OK(cp_->ServiceStop());
+  auto new_map = std::make_shared<row_map>();
+  map_.reset();
+  map_ = std::move(new_map);
+  next_id_ = 0;
+  RETURN_IF_NOT_OK(cp_->ServiceStart());
+  return Status::OK();
+}
+Status CacheService::GetStat(CacheService::ServiceStat *out) {
+  SharedLock rw(&rw_lock_);
+  RETURN_UNEXPECTED_IF_NULL(out);
+  if (st_ == State::kNone || st_ == State::kFetchPhase) {
+    out->stat_ = cp_->GetStat();
+    out->state_ = static_cast<ServiceStat::state_type>(st_);
+    auto it = map_->begin();
+    if (it != map_->end()) {
+      out->min_ = it.key();
+      auto end_it = map_->end();
+      --end_it;
+      out->max_ = end_it.key();
+    }
+  } else {
+    out->state_ = static_cast<ServiceStat::state_type>(st_);
+  }
+  return Status::OK();
+}
+Status CacheService::BatchFetch(const std::vector<row_id_type> &v, MemGuard<uint8_t> *out) const {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kBuildPhase) {
+    // For this kind of cache service, we can't fetch yet until we are done with caching all the rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  const auto num_elements = v.size();
+  int64_t mem_sz = (num_elements + 1) * sizeof(int64_t);
+  int64_t data_offset = mem_sz;
+  std::vector<int64_t> sz_v;
+  std::vector<CachePool::key_type> keys;
+  sz_v.reserve(num_elements);
+  keys.reserve(num_elements);
+  for (auto row_id : v) {
+    auto r = map_->Search(row_id);
+    if (r.second) {
+      auto &it = r.first;
+      CachePool::key_type key = it.value();
+      auto sz = cp_->GetSize(key);
+      if (sz == 0) {
+        std::string errMsg = "Key not found: ";
+        errMsg += std::to_string(key);
+        RETURN_STATUS_UNEXPECTED(errMsg);
+      }
+      keys.push_back(key);
+      sz_v.push_back(sz);
+      mem_sz += sz;
+    } else {
+      keys.push_back(-1);
+      sz_v.push_back(0);
+    }
+  }
+  MemGuard<uint8_t> mem;
+  RETURN_IF_NOT_OK(mem.allocate(mem_sz));
+  auto *offset_array = reinterpret_cast<int64_t *>(mem.GetMutablePointer());
+  offset_array[0] = data_offset;
+  WritableSlice all(mem.GetMutablePointer(), mem.GetSizeInBytes());
+  for (auto i = 0; i < num_elements; ++i) {
+    auto sz = sz_v.at(i);
+    offset_array[i + 1] = offset_array[i] + sz;
+    if (sz > 0) {
+      WritableSlice row_data(all, offset_array[i], sz);
+      auto key = keys.at(i);
+      size_t bytesRead = 0;
+      RETURN_IF_NOT_OK(cp_->Read(key, &row_data, &bytesRead));
+      if (bytesRead != sz) {
+        MS_LOG(ERROR) << "Unexpected length. Read " << bytesRead << ". Expected " << sz << "."
+                      << " Internal key: " << key << "\n";
+        RETURN_STATUS_UNEXPECTED("Length mismatch. See log file for details.");
+      }
+    }
+  }
+  *out = std::move(mem);
+  return Status::OK();
+}
+Status CacheService::CacheSchema(const void *buf, int64_t len) {
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kFetchPhase) {
+    // For this kind of cache service, once we are done with the build phase into fetch phase, we can't
+    // allow other to cache more rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  // This is a special request and we need to remember where we store it.
+  // In case we are calling the same function from multiple threads, only
+  // the first one is considered. Rest is ignored.
+  CachePool::key_type cur_key = schema_key_;
+  CachePool::key_type key;
+  if (cur_key < 0) {
+    RETURN_IF_NOT_OK(cp_->Insert({ReadableSlice(buf, len)}, &key));
+    auto result = std::atomic_compare_exchange_strong(&schema_key_, &cur_key, key);
+    MS_LOG(DEBUG) << "Caching Schema. Result = " << result;
+  } else {
+    MS_LOG(DEBUG) << "Caching Schema already done";
+  }
+  return Status::OK();
+}
+Status CacheService::FetchSchema(MemGuard<uint8_t> *out) const {
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kBuildPhase) {
+    // For this kind of cache service, we can't fetch yet until we are done with caching all the rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  RETURN_UNEXPECTED_IF_NULL(out);
+  MemGuard<uint8_t> mem;
+  if (schema_key_ >= 0) {
+    auto len = cp_->GetSize(schema_key_);
+    RETURN_IF_NOT_OK(mem.allocate(len));
+    auto slice = WritableSlice(mem.GetMutablePointer(), len);
+    RETURN_IF_NOT_OK(cp_->Read(schema_key_, &slice));
+    *out = std::move(mem);
+  } else {
+    return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, "No schema has been cached");
+  }
+  return Status::OK();
+}
+Status CacheService::BuildPhaseDone() {
+  if (HasBuildPhase()) {
+    // Exclusive lock to switch phase
+    UniqueLock rw(&rw_lock_);
+    st_ = State::kFetchPhase;
+    return Status::OK();
+  } else {
+    RETURN_STATUS_UNEXPECTED("Not a cache that has a build phase");
+  }
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h
new file mode 100644
index 0000000000..bf324e82e3
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h
@@ -0,0 +1,143 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef DATASET_ENGINE_CACHE_SERVICE_H_
+#define DATASET_ENGINE_CACHE_SERVICE_H_
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/system_pool.h"
+
+namespace mindspore {
+namespace dataset {
+struct CacheStat;
+/// \brief A cache service for storing/fetching buffers to in memory cache and may spill to disk the cache service is
+/// created to support spilling
+class CacheService : public Service {
+ public:
+  friend class CacheServer;
+  using row_map = BPlusTree<row_id_type, CachePool::key_type>;
+
+  enum class State : uint8_t { kNone = 0, kBuildPhase, kFetchPhase };
+
+  /// \brief Constructor
+  /// \param mem_sz Memory size to be set aside for the in memory cache. 0 means unlimited
+  /// \param root Spill path. Empty string means no spilling
+  /// \param generate_id If the cache service should generate row id for buffer that is cached.
+  /// For non-mappable dataset, this should be set to true.
+  CacheService(uint64_t mem_sz, const std::string &root, bool generate_id);
+  ~CacheService();
+
+  /// \brief For fixed size memory, we will create an Arena.
+  /// \return false if unlimited memory.
+  bool UseArena();
+
+  Status DoServiceStart() override;
+  Status DoServiceStop() override;
+
+  /// \brief Main function to cache a row which is in form a series of buffers.
+  /// The first buffer is a Google flatbuffer which describes the rest of the buffers followed.
+  /// \param[in] buf Vector of buffer
+  /// \param[out] row_id_generated The row id assigned to this row if any
+  /// \return Status object
+  Status CacheRow(const std::vector<const void *> &buf, row_id_type *row_id_generated);
+  /// \brief Main function to fetch rows in batch. The output is a contiguous memory which will be decoded
+  /// by the CacheClient. Cache miss is not an error, and will be coded in the output to mark an empty row.
+  /// \param[in] v A vector of row id.
+  /// \param[out] out A contiguous memory buffer that holds the requested rows.
+  /// \return Status object
+  Status BatchFetch(const std::vector<row_id_type> &v, MemGuard<uint8_t> *out) const;
+
+  /// \brief Getter function
+  /// \return Spilling path
+  Path GetSpillPath() const;
+  /// \brief A structure returned from the cache server for statistics request.
+  class ServiceStat {
+   public:
+    using state_type = std::underlying_type<State>::type;
+    ServiceStat() : min_(0), max_(0), state_(0) {}
+    CachePool::CacheStat stat_{};
+    row_id_type min_;
+    row_id_type max_;
+    state_type state_;
+  };
+  /// \brief Statistics for the current service
+  /// \param[in/out] A pointer to a pre-allocated ServiceStat structure
+  /// \return Status Object
+  Status GetStat(ServiceStat *);
+  /// \brief Cache schema
+  /// \param buf A Google Flatbuffer that contains the schema
+  /// \param len size of the buffer
+  /// \return Status object
+  Status CacheSchema(const void *buf, int64_t len);
+  /// \brief Fetch schema
+  /// \param out A contiguous memory that contains the serialized form of schema.
+  /// \return Status object
+  Status FetchSchema(MemGuard<uint8_t> *out) const;
+  /// \brief Purge the content of a cache
+  /// \return Status object
+  Status Purge();
+  /// \brief Overload the << operator to print a cache service
+  /// \param out std::ostream
+  /// \param cs A cache service
+  /// \return std::ostream
+  friend std::ostream &operator<<(std::ostream &out, const CacheService &cs);
+  /// \brief Every cache service has a cookie. If the cookie of a CacheClient matches this cookie, this CacheClient
+  /// is the creator
+  /// \return Cookie
+  std::string cookie() const { return cookie_; }
+  /// \brief If this cache service generates row id for buffer cached, it is divided into two phases, a build phase and
+  /// a read phase.
+  /// \return True if has two phases.
+  bool HasBuildPhase() const { return generate_id_; }
+  /// \brief Change from write phase to read phase. Only the creator of this service is allowed to make this call.
+  /// \return Status object
+  Status BuildPhaseDone();
+
+ private:
+  mutable RWLock rw_lock_;
+  std::string root_;
+  uint64_t cache_mem_sz_;
+  std::shared_ptr<CachePool> cp_;
+  std::shared_ptr<row_map> map_;
+  std::atomic<row_id_type> next_id_;
+  bool generate_id_;
+  std::atomic<CachePool::key_type> schema_key_;
+  std::string cookie_;
+  State st_;
+
+  /// \brief Private function to generate a row id
+  /// \return Row id assigned.
+  row_id_type GetNextRowId() { return next_id_.fetch_add(1); }
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_CACHE_SERVICE_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs b/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs
new file mode 100644
index 0000000000..de26069f23
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace mindspore.dataset;
+
+/// Type of a Tensor
+enum TensorType : byte {
+    DE_UNKNOWN = 0,
+    DE_BOOL = 1,
+    DE_INT8 = 2,
+    DE_UINT8 = 3,
+    DE_INT16 = 4,
+    DE_UINT16 = 5,
+    DE_INT32 = 6,
+    DE_UINT32 = 7,
+    DE_INT64 = 8,
+    DE_UINT64 = 9,
+    DE_FLOAT16 = 10,
+    DE_FLOAT32 = 11,
+    DE_FLOAT64 = 12,
+    DE_STRING = 13
+}
+
+/// The meta information of a Tensor
+/// \note Only the type and shape are considered meta information. Tensor data is excluded.
+table TensorMetaMsg {
+    dims:[int64] (required);
+    type:TensorType;
+}
+
+/// This is the first buffer that is sent to a Cache server when a TensorRow is serialized.
+/// \param row_id is the row id of the TensorRow.
+/// \param column The meta information of each Tensor in the row
+/// \param size of this serialized buffer
+/// \param size of each tensor data buffer that follows
+table TensorRowHeaderMsg {
+    row_id:int64;
+    column:[TensorMetaMsg] (required);
+    size_of_this:int64;
+    data_sz:[int64] (required);
+}
+
+root_type TensorRowHeaderMsg;
+
+/// A row of row id's
+table TensorRowIds {
+    row_id:[int64] (required);
+}
+
+/// Statistics returned from each cache service
+/// \note It must match CacheService::ServiceStat
+table ServiceStatMsg {
+    num_mem_cached:int64;
+    num_disk_cached:int64;
+    min_row_id:int64;
+    max_row_id:int64;
+    state:int8;
+}
+
+/// Column description of each column in a schema
+table ColumnNameMsg {
+    name:string;
+    id:int32;
+}
+
+/// Serialized form of a schema
+table SchemaMsg {
+    column:[ColumnNameMsg];
+}
diff --git a/mindspore/ccsrc/dataset/engine/connector.h b/mindspore/ccsrc/minddata/dataset/engine/connector.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/connector.h
index bd66172be5..a91d8e68e9 100644
--- a/mindspore/ccsrc/dataset/engine/connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/connector.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/cond_var.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.cc b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/data_buffer.cc
rename to mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
index 32a70c259f..b36aae6837 100644
--- a/mindspore/ccsrc/dataset/engine/data_buffer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/allocator.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -24,10 +24,8 @@ namespace dataset {
 // Description: This is the main constructor that is used for making a buffer
 DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {}
 
-// Name: print()
-// Description: A function that prints info about the DataBuffer (base class version)
-void DataBuffer::Print(std::ostream &out,      // In: The output stream to print to
-                       bool show_all) const {  // In: T/F if it should show everything
+// A method for debug printing of the buffer
+void DataBuffer::Print(std::ostream &out, bool show_all) const {
   out << "bufferId: " << buffer_id_ << "\nflags: " << std::hex << buffer_flags_ << std::dec << "\n";
 
   // If the column counts are set then it means that data has been set into
@@ -46,11 +44,6 @@ void DataBuffer::Print(std::ostream &out,      // In: The output stream to print
   }
 }
 
-Status DataBuffer::Load() {
-  std::string err_msg = "Base class load called, but it does not have an implementation!";
-  RETURN_STATUS_UNEXPECTED(err_msg);
-}
-
 // Remove me!! Callers should fetch rows via pop
 Status DataBuffer::GetTensor(std::shared_ptr<Tensor> *ptr, int32_t row_id, int32_t col_id) const {
   if (row_id < tensor_table_->size() && col_id < tensor_table_->at(row_id).size()) {
@@ -92,8 +85,5 @@ Status DataBuffer::SliceOff(int64_t number_of_rows) {
 
   return Status::OK();
 }
-
-// Destructor
-DataBuffer::~DataBuffer() {}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.h b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
similarity index 77%
rename from mindspore/ccsrc/dataset/engine/data_buffer.h
rename to mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
index 2ab0783519..5fcb4c21a5 100644
--- a/mindspore/ccsrc/dataset/engine/data_buffer.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
@@ -21,19 +21,17 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
-// The DataBuffer class is a base class that will represent the data for n values based
-// on a unique row id for each row of data.
-// There can be different types of DataBuffers to abstract over how the data is stored
-// in memory and acquired from storage.
-// Each buffer holds a range of consecutive row id's.
+/// \brief The DataBuffer class is a container of tensor data and is the unit of transmission between
+///     connectors of dataset operators.  Inside the buffer, tensors are organized into a table-like format
+///     where n TensorRows may consist of m tensors (columns).
 class DataBuffer {
  public:
   // Buffer flags
@@ -47,13 +45,13 @@ class DataBuffer {
   // Description: This is the main constructor that is used for making a buffer
   DataBuffer(int32_t id, BufferFlags flags);
 
-  // Destructor
-  virtual ~DataBuffer();
+  /// \brief default destructor
+  ~DataBuffer() = default;
 
-  // Name: print()
-  // Description: A function that prints info about the DataBuffer (base class version)
-  virtual void Print(std::ostream &out,     // In: The output stream to print to
-                     bool show_all) const;  // In: T/F if it should show everything
+  /// \brief A method for debug printing of the buffer
+  /// \param[inout] out The stream to write to
+  /// \param[in] show_all A boolean to toggle between details and summary printing
+  void Print(std::ostream &out, bool show_all) const;
 
   // Provide stream operator for displaying it
   friend std::ostream &operator<<(std::ostream &out, const DataBuffer &cb) {
@@ -61,10 +59,6 @@ class DataBuffer {
     return out;
   }
 
-  // Name: load()
-  // Description: populates the DataBuffer with data based on it's id
-  virtual Status Load();
-
   // Convenience getter functions for flag checking
   bool eof() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOF)); }
 
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/data_schema.cc
rename to mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
index 6c5f882bed..50d910251d 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/data_schema.h"
 
 #include <algorithm>
 #include <fstream>
@@ -24,8 +24,8 @@
 #include <nlohmann/json.hpp>
 
 #include "common/utils.h"
-#include "dataset/util/status.h"
-#include "dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor_shape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.h b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/data_schema.h
rename to mindspore/ccsrc/minddata/dataset/engine/data_schema.h
index ce61b8952d..96f6f2b118 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
@@ -23,10 +23,10 @@
 #include <unordered_map>
 #include <vector>
 #include <nlohmann/json.hpp>
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/dataset_iterator.cc
rename to mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
index be333741b1..f75ca5d097 100644
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
 #include <unordered_map>
 #include <utility>
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.h b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/dataset_iterator.h
rename to mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
index 4e40e77c74..253d1604e2 100644
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
@@ -20,11 +20,11 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
similarity index 51%
rename from mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
index ed57421030..a2cd6dc07a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
@@ -2,13 +2,12 @@ add_subdirectory(source)
 
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SRC_FILES
     dataset_op.cc
     parallel_op.cc
     pipeline_op.cc
-    barrier_op.cc
     batch_op.cc
-    bucket_batch_by_length_op.cc
     device_queue_op.cc
     map_op.cc
     project_op.cc
@@ -19,7 +18,21 @@ add_library(engine-datasetops OBJECT
     shuffle_op.cc
     zip_op.cc
     concat_op.cc
-    filter_op.cc
-    build_vocab_op.cc
+    cache_base_op.cc
+    cache_lookup_op.cc
+    cache_op.cc
+    cache_merge_op.cc
     )
 
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SRC_FILES}
+        bucket_batch_by_length_op.cc
+        barrier_op.cc
+        filter_op.cc
+        build_vocab_op.cc
+        )
+endif()
+
+add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES})
+
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
index 6fc276a75e..51ea232e68 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/barrier_op.h"
+#include "minddata/dataset/engine/datasetops/barrier_op.h"
 #include <iomanip>
 #include <utility>
-#include "dataset/core/constants.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
index 379b8f146b..a3ac843272 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
index 8bfa8c287c..844d054307 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
@@ -13,17 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
 
 #include <utility>
 #include <iomanip>
 
 #include "common/utils.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/data/data_utils.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 using float16 = Eigen::half;
 
@@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
 
 Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
   RETURN_IF_NOT_OK(SanityCheck());
+#ifdef ENABLE_PYTHON
   *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
                                    builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
                                    builder_batch_map_func_, builder_pad_map_);
+#else
+  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
+                                   builder_num_workers_, builder_cols_to_map_, builder_pad_map_);
+#endif
   return Status::OK();
 }
 
@@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
   return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
 }
 
+#ifdef ENABLE_PYTHON
 BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
                  const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
                  PadInfo pad_map)
@@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
       pad_info_(pad_map) {
   worker_queues_.Init(num_workers, op_queue_size);
 }
+#else
+BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+                 const std::vector<std::string> &cols_to_map, PadInfo pad_map)
+    : ParallelOp(num_workers, op_queue_size),
+      start_batch_size_(batch_size),
+      drop_(drop),
+      pad_(pad),
+      pyfunc_column_names_(cols_to_map),
+      pad_info_(pad_map) {
+  worker_queues_.Init(num_workers, op_queue_size);
+}
+#endif
 
 Status BatchOp::operator()() {
   Status rc = LaunchThreadsAndInitOp();
@@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
 Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                                   std::unique_ptr<DataBuffer> *db) {
   RETURN_UNEXPECTED_IF_NULL(table_pair.first);
-  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));               // pass it through pyfunc
+#ifdef ENABLE_PYTHON
+  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
+#endif
   if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_));  // do padding if needed
   (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
   std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
@@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
   TensorBatchTable input_table;
   input_table.reserve(pyfunc_column_names_.size());
@@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
   }
   return Status::OK();
 }
+#endif
 
 Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
+#ifdef ENABLE_PYTHON
   if (batch_size_func_ != nullptr) {
     RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
   } else {
     (*batch_size) = start_batch_size_;
   }
+#else
+  (*batch_size) = start_batch_size_;
+#endif
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
   {
     // Acquire Python GIL
@@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
   }
   return Status(StatusCode::kOK);
 }
+#endif
 
 Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info,
                            const std::unordered_map<std::string, int32_t> &column_name_id_map) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
index 28df5e7e81..0c042433f7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
@@ -26,11 +26,11 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
       return *this;
     }
 
+#ifdef ENABLE_PYTHON
     // set columns to perform map on
     // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
     // @return Builder & reference to builder class object
@@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
       builder_batch_size_func_ = batch_size_func;
       return *this;
     }
+#endif
 
     // @param std::shared_ptr<BatchOp>  *ptr pointer to shared_ptr, actual return arg
     // @return Status - The error code return
@@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
     int32_t builder_op_connector_size_;
     std::vector<std::string> builder_cols_to_map_;
     PadInfo builder_pad_map_;
+#ifdef ENABLE_PYTHON
     py::function builder_batch_size_func_;
     py::function builder_batch_map_func_;
+#endif
   };
 
   enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 };
@@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
     const int64_t get_epoch_num() const { return epoch_num_; }
   };
 
+#ifdef ENABLE_PYTHON
   // BatchOp constructor
   // @param int32_t batch_size
   // @param bool drop
@@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
   // @param int32_t num_workers
   BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
           const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map);
+#else
+  BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+          const std::vector<std::string> &, PadInfo pad_map);
+#endif
 
   // BatchOp destructor
   ~BatchOp() {}
@@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                            std::unique_ptr<DataBuffer> *db);
+
+#ifdef ENABLE_PYTHON
   // Function that calls pyfunc to perform map on batch
   // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
   // @return Status - The error code return
   Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
+#endif
 
   // @param const PadInfo &pad_info pad info to unpack
   // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
@@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status LaunchThreadsAndInitOp();
 
+#ifdef ENABLE_PYTHON
   // Invoke batch size function with current BatchInfo to generate batch size.
   // @return Status - The error code return
   Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info);
@@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
   // Invoke batch map function with current BatchInfo to generate tensors to batch.
   // @return Status - The error code return
   Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
+#endif
 
   int32_t start_batch_size_;
   bool drop_;                                      // bool for whether to drop remainder or not
@@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
   PadInfo pad_info_;                               // column names to perform padding on
   std::unique_ptr<ChildIterator> child_iterator_;  // child iterator for fetching TensorRows 1 by 1
   QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_;  // internal queue for syncing worker
+#ifdef ENABLE_PYTHON
   py::function batch_size_func_;  // Function pointer of batch size function
   py::function batch_map_func_;   // Function pointer of per batch map function
+#endif
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
index 5e143b700f..138bb7980b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/bucket_batch_by_length_op.h"
+#include "minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h"
 
 #include <map>
 #include <memory>
@@ -24,14 +24,14 @@
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/pybind_support.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/status.h"
 
 namespace py = pybind11;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
index bf0bcb0e78..332ff4bb22 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
index ceb5058593..8ed51ebbb6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/build_vocab_op.h"
+#include "minddata/dataset/engine/datasetops/build_vocab_op.h"
 
 #include <algorithm>
 #include <limits>
 #include <string>
 #include <unordered_map>
 #include <utility>
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/config_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
index bf358c48c6..42ea0deb5c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc
new file mode 100644
index 0000000000..1b0890686f
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc
@@ -0,0 +1,185 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+#include <iomanip>
+#include <iostream>
+#include "minddata/dataset/engine/execution_tree.h"
+
+namespace mindspore {
+namespace dataset {
+// A print method typically used for debugging
+void CacheBase::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <" << Name() << ">:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nCache client:\n" << *cache_client_ << "\n\n";
+  }
+}
+// Overrides base class reset method.  When an operator does a reset, it cleans up any state
+// info from it's previous execution and then initializes itself so that it can be executed
+// again.
+Status CacheBase::Reset() {
+  if (sampler_ != nullptr) {
+    RETURN_IF_NOT_OK(sampler_->ResetSampler());
+  }
+  // Wake up the workers to get them going again in a new epoch
+  MS_LOG(DEBUG) << Name() << " resetting.";
+  epoch_sync_.Set();
+  return Status::OK();
+}
+CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                     std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+    : ParallelOp(num_workers, op_connector_size, sampler),
+      cache_client_(cache_client),
+      rows_per_buffer_(rows_per_buf),
+      // We can cause deadlock if this internal Connector size is too small.
+      keys_miss_(num_workers_, 1, connector_capacity_) {
+  io_block_queues_.Init(num_workers, op_connector_size);
+}
+// Common function to fetch samples from the sampler and send them using the io_block_queues to
+// the parallel workers
+Status CacheBase::FetchSamplesToWorkers() {
+  int64_t buf_cnt = 0;
+  int64_t wait_cnt = 0;
+  do {
+    epoch_sync_.Clear();
+    std::vector<row_id_type> keys;
+    int64_t row_cnt = 0;
+    keys.reserve(rows_per_buffer_);
+    std::unique_ptr<DataBuffer> sampler_buffer;
+    RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    while (!sampler_buffer->eoe()) {
+      TensorRow sample_row;
+      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
+      std::shared_ptr<Tensor> sample_ids = sample_row[0];
+      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); itr++) {
+        keys.push_back(*itr);
+        ++row_cnt;
+        if (row_cnt % rows_per_buffer_ == 0) {
+          auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
+          RETURN_IF_NOT_OK(io_block_queues_[buf_cnt++ % num_workers_]->Add(std::move(blk)));
+          keys.clear();
+        }
+      }
+      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    }
+    if (!keys.empty()) {
+      auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
+      RETURN_IF_NOT_OK(io_block_queues_[buf_cnt++ % num_workers_]->Add(std::move(blk)));
+    }
+    // send the eoe
+    RETURN_IF_NOT_OK(
+      io_block_queues_[(buf_cnt++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+    // If repeat but the not last repeat, wait for reset.
+    if (BitTest(op_ctrl_flags_, kDeOpRepeated) && !BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
+      MS_LOG(DEBUG) << Name() << " Waiting for reset. Count " << ++wait_cnt << " Buffer sent " << buf_cnt;
+      RETURN_IF_NOT_OK(epoch_sync_.Wait());
+    } else {
+      // We can break out from the loop.
+      break;
+    }
+  } while (true);
+  // Flow the eof before exit
+  RETURN_IF_NOT_OK(
+    io_block_queues_[(buf_cnt++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+  // Ask all the workers to quit.
+  for (int32_t i = 0; i < num_workers_; i++) {
+    RETURN_IF_NOT_OK(
+      io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+  }
+  return Status::OK();
+}
+Status CacheBase::FetchFromCache(int32_t worker_id) {
+  int64_t buffer_id = worker_id;
+  std::unique_ptr<IOBlock> blk;
+  do {
+    RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&blk));
+    if (blk->eof()) {
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+    } else if (blk->eoe()) {
+      if (AllowCacheMiss()) {
+        // This code path is for CacheLookupOp acting as a sampler. If we get a eoe from
+        // a sampler, send a eoe to physical leaf op as well.
+        std::vector<row_id_type> eoe;
+        eoe.push_back(eoe_row_id);
+        RETURN_IF_NOT_OK(keys_miss_.Push(worker_id, eoe));
+      }
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+    } else {
+      std::vector<int64_t> keys;
+      RETURN_IF_NOT_OK(blk->GetKeys(&keys));
+      if (keys.empty()) {
+        // empty key is a quit signal for workers
+        break;
+      }
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<TensorQTable> que = std::make_unique<TensorQTable>();
+      TensorTable ttbl;
+      RETURN_IF_NOT_OK(cache_client_->GetRows(keys, &ttbl));
+      auto row_it = ttbl.begin();
+      std::vector<row_id_type> cache_miss;
+      cache_miss.reserve(keys.size());
+      for (auto row_id : keys) {
+        auto &row = *row_it;
+        if (row.empty()) {
+          if (AllowCacheMiss()) {
+            cache_miss.push_back(row_id);
+          } else {
+            std::string errMsg = "Row id " + std::to_string(row_id) + " not found.";
+            RETURN_STATUS_UNEXPECTED(errMsg);
+          }
+        }
+        que->push_back(std::move(row));
+        ++row_it;
+      }
+      db->set_tensor_table(std::move(que));
+      if (AllowCacheMiss()) {
+        // Because of the way connector works, we push unconditionally even cache_miss can be empty.
+        RETURN_IF_NOT_OK(keys_miss_.Push(worker_id, cache_miss));
+      }
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
+      buffer_id += num_workers_;
+    }
+  } while (true);
+  return Status::OK();
+}
+Status CacheBase::RegisterResources() {
+  RETURN_IF_NOT_OK(epoch_sync_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+CacheBase::~CacheBase() {}
+Status CacheBase::UpdateColumnMapFromCache() {
+  Status rc;
+  // Get the schema from the server. It may not be there yet. So tolerate the error.
+  if (column_name_id_map_.empty()) {
+    rc = cache_client_->FetchSchema(&column_name_id_map_);
+    if (rc == Status(StatusCode::kFileNotExist)) {
+      MS_LOG(DEBUG) << "Schema not in the server yet.";
+      rc = Status::OK();
+    }
+  }
+  return rc;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h
new file mode 100644
index 0000000000..fb3e999b76
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h
@@ -0,0 +1,108 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+namespace mindspore {
+namespace dataset {
+/// \brief This is the base class for CacheOp and CacheLookupOp which share many similarities.
+/// \see CacheOp
+/// \see CacheLookupOp
+class CacheBase : public ParallelOp {
+ public:
+  /// \brief Base class constructor
+  /// \param num_workers Number of parallel workers
+  /// \param op_connector_size Connector size
+  /// \param rows_per_buf Number of rows per buffer
+  /// \param cache_client CacheClient for communication to the CacheServer
+  /// \param sampler Sampler which is mandatory
+  CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+            std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler);
+  /// \brief Destructor
+  ~CacheBase();
+
+  /// \brief Overrides base class reset method.  When an operator does a reset, it cleans up any state
+  /// info from it's previous execution and then initializes itself so that it can be executed
+  /// again.
+  /// \return Status - The error code return
+  Status Reset() override;
+
+  /// \brief A print method typically used for debugging
+  /// \param out The output stream to write output to
+  /// \param show_all A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
+  /// \brief << Stream output operator overload
+  /// \notes This allows you to write the debug print info using stream operators
+  /// \param out reference to the output stream being overloaded
+  /// \param mo reference to the CacheOp to display
+  /// \return the output stream must be returned
+  friend std::ostream &operator<<(std::ostream &out, const CacheBase &mo) {
+    mo.Print(out, false);
+    return out;
+  }
+
+  /// \brief Getter for the cache client
+  /// \return shared ptr to the cache client
+  std::shared_ptr<CacheClient> cache_client() { return cache_client_; }
+  /// \brief Setter for the cache client
+  void SetCacheClient(std::shared_ptr<CacheClient> cache_client) { cache_client_ = std::move(cache_client); }
+  /// \brief Derived class must implement this method if a cache miss is treated as error
+  virtual bool AllowCacheMiss() = 0;
+
+ protected:
+  constexpr static int32_t eoe_row_id = -1;
+  std::shared_ptr<CacheClient> cache_client_;
+  WaitPost epoch_sync_;
+  int32_t rows_per_buffer_;
+  Connector<std::vector<row_id_type>> keys_miss_;
+
+  /// \brief Common function to register resources for interrupt
+  /// \note Derived should override this function for extra resources to be registered
+  virtual Status RegisterResources();
+  /// \brief This function is called by main thread to send samples to the worker thread.
+  /// \note It is a non-virtual function
+  /// \return Status object
+  Status FetchSamplesToWorkers();
+  /// \brief This function is called by each worker to fetch rows from the cache server for a given set of
+  /// sample row id's
+  /// \return Status object
+  Status FetchFromCache(int32_t worker_id);
+  /// \brief Get the column map from cache server
+  Status UpdateColumnMapFromCache();
+
+ private:
+  constexpr static int32_t connector_capacity_ = 1024;
+  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc
new file mode 100644
index 0000000000..0a9b7544ba
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "utils/log_adapter.h"
+#include "utils/system/crc32c.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor. Creates the builder object.
+CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  build_op_connector_size_ = cfg->op_connector_size();
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheLookupOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheLookupOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "Cache client for CacheLookupOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheLookupOp::Builder::Build(std::shared_ptr<CacheLookupOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_,
+                                         build_cache_client_, build_sampler_);
+  return Status::OK();
+}
+Status CacheLookupOp::operator()() {
+  if (!sampler_) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "CacheLookupOp requires a sampler before it can be executed!");
+  }
+  RETURN_IF_NOT_OK(RegisterResources());
+  // Kick off the workers
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheLookupOp::WorkerEntry, this, std::placeholders::_1)));
+  // required task group sync after launching workers
+  TaskManager::FindMe()->Post();
+  // We have to wait until the leaf op has handshake with us.
+  RETURN_IF_NOT_OK(leaf_op_wp_.Wait());
+  RETURN_IF_NOT_OK(FetchSamplesToWorkers());
+  return Status::OK();
+}
+Status CacheLookupOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(FetchFromCache(worker_id));
+  return Status::OK();
+}
+Status CacheLookupOp::ResetSampler() { return Status::OK(); }
+Status CacheLookupOp::HandshakeRandomAccessOp(const RandomAccessOp *op) {
+  // We act like a sampler and as a dataset op. During handshake with leaf op,
+  // We must wait until the leaf op has indexed everything.
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(op));
+  // Now we notify the main thread handshake has finished.
+  leaf_op_wp_.Set();
+  return Status::OK();
+}
+Status CacheLookupOp::InitSampler() { return Sampler::InitSampler(); }
+void CacheLookupOp::Print(std::ostream &out, bool show_all) const { CacheBase::Print(out, show_all); }
+Status CacheLookupOp::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
+  std::vector<row_id_type> cache_miss;
+  RETURN_IF_NOT_OK(keys_miss_.Pop(0, &cache_miss));
+  // Ignore the case we have no cache miss, we can't return empty samples.
+  while (cache_miss.empty()) {
+    RETURN_IF_NOT_OK(keys_miss_.Pop(0, &cache_miss));
+  }
+  // Special code for eoe
+  if (cache_miss.at(0) == eoe_row_id) {
+    *out_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+  } else {
+    std::shared_ptr<Tensor> sample_ts;
+    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ts, cache_miss.size()));
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagNone);
+    auto idPtr = sample_ts->begin<int64_t>();
+    for (auto i = 0; i < cache_miss.size(); ++i) {
+      *idPtr = cache_miss.at(i);
+      ++idPtr;
+    }
+    TensorRow row;
+    row.push_back(sample_ts);
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
+  }
+  return Status::OK();
+}
+Status CacheLookupOp::RegisterResources() {
+  RETURN_IF_NOT_OK(CacheBase::RegisterResources());
+  RETURN_IF_NOT_OK(leaf_op_wp_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+Status CacheLookupOp::ComputeColMap() {
+  // We don't know the column map at this point unless we contact the cache server
+  // to fetch the schema but the cache server may not have it at this point either.
+  // So we will just return OK and let MergeOp (our parent) to handle it.
+  return Status::OK();
+}
+
+// Visitor accept method for NodePass
+Status CacheLookupOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheLookupOp>(), modified);
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h
new file mode 100644
index 0000000000..46a58c5d02
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief provides a memory/disk cache that acts as a save-point within a mappable dataset.
+/// \note For non-mappable dataset, please see CacheOp
+/// \see CacheOp
+class CacheLookupOp : public CacheBase, public Sampler {
+ public:
+  class Builder {
+   public:
+    /// \brief Builder constructor. Creates the builder object.
+    /// \note No default args
+    Builder();
+
+    /// Default destructor
+    ~Builder() = default;
+
+    /// Setter method.
+    /// \treturn Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheLookupOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheLookupOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t rows_per_buffer_;
+    int32_t build_op_connector_size_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    // Check if the required parameters are set by the builder.
+    // \return Status The error code return
+    Status SanityCheck() const;
+  };
+  /// \brief Constructor
+  /// \note It takes the same argument as the base class.
+  /// \see CacheBase
+  CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+      : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), Sampler(*(sampler.get())) {}
+  ~CacheLookupOp() = default;
+  // As a parallel op, we override these two functions
+  Status operator()() override;
+  Status WorkerEntry(int32_t worker_id) override;
+  // As a sampler, we override the following functions
+  Status ResetSampler() override;
+  Status HandshakeRandomAccessOp(const RandomAccessOp *op) override;
+  Status InitSampler() override;
+  Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;
+  void Print(std::ostream &out, bool show_all) const override;
+  bool AllowCacheMiss() override { return true; }
+  std::string Name() const override { return "CacheLookupOp"; }
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
+ protected:
+  Status ComputeColMap() override;
+
+ private:
+  WaitPost leaf_op_wp_;
+
+  Status RegisterResources() override;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc
new file mode 100644
index 0000000000..75579dc3a6
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc
@@ -0,0 +1,302 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+
+#include <algorithm>
+#include <functional>
+#include <iomanip>
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/task_manager.h"
+
+namespace mindspore {
+namespace dataset {
+CacheMergeOp::~CacheMergeOp() = default;
+void CacheMergeOp::Print(std::ostream &out, bool show_all)
+  const {  // Always show the id and name as first line regardless if this is summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <CacheMergeOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\n\n";
+  }
+}
+CacheMergeOp::CacheMergeOp(int32_t numWorkers, int32_t opConnectorSize, int32_t numCleaners,
+                           std::shared_ptr<CacheClient> cache_client, const std::shared_ptr<Sampler> &sampler)
+    : ParallelOp(numWorkers, opConnectorSize, sampler), num_cleaners_(numCleaners), cache_client_(cache_client) {}
+Status CacheMergeOp::operator()() {
+  // A queue of row id to let cleaner send cache miss rows to the cache server
+  // We don't want a small queue as this will block the parallel op workers.
+  // A row id is 8 byte integer. So bigger size doesn't consume a lot of memory.
+  static const int32_t queue_sz = 512;
+  io_que_ = std::make_unique<Queue<row_id_type>>(queue_sz);
+  RETURN_IF_NOT_OK(io_que_->Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheMergeOp::WorkerEntry, this, std::placeholders::_1)));
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheMergeOp::CacheMissWorkerEntry, this, std::placeholders::_1)));
+  // One dedicated thread to move TensorRow from the pool to the cache server
+  for (auto i = 0; i < num_cleaners_; ++i) {
+    RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Cleaner", std::bind(&CacheMergeOp::Cleaner, this)));
+  }
+  TaskManager::FindMe()->Post();
+  return Status::OK();
+}
+// Each parallel worker will pop from the CacheHit stream. If there is a missing TensorRow, we will wait
+// until it shows up in the pool.
+Status CacheMergeOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  std::shared_ptr<DatasetOp> cache_hit_stream = child_[kCacheHitChildIdx];
+  std::unique_ptr<DataBuffer> db_ptr;
+  RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+  while (!db_ptr->eof()) {
+    if (db_ptr->eoe()) {
+      RETURN_IF_NOT_OK(EoeReceived(worker_id));
+      db_ptr.reset();
+      RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+    } else {
+      // See if there is any missing row
+      auto tbl = std::make_unique<TensorQTable>();
+      while (db_ptr->NumRows() > 0) {
+        TensorRow row;
+        RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+        if (row.empty()) {
+          auto row_id = row.getId();
+          TensorRowRequest *rq = nullptr;
+          RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+          // Block until the row shows up in the pool.
+          RETURN_IF_NOT_OK(rq->Wait(&row));
+        }
+        tbl->push_back(std::move(row));
+      }
+      db_ptr->set_tensor_table(std::move(tbl));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db_ptr)));
+      RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+    }
+  }
+  RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db_ptr)));
+  return Status::OK();
+}
+Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) {
+  TaskManager::FindMe()->Post();
+  // We will simply pop TensorRow from the stream and insert them into the pool and
+  // wake up any worker that is awaiting on the missing TensorRow.
+  // If we see an eoe, ignore it. For eof, we exit.
+  std::shared_ptr<DatasetOp> cache_missing_stream = child_[kCacheMissChildIdx];
+  // Before we start, cache the schema at the server. Pick one of the workers
+  // do it. The schema should have been done at prepare time.
+  if (workerId == 0) {
+    RETURN_IF_NOT_OK(cache_client_->CacheSchema(column_name_id_map()));
+  }
+  std::unique_ptr<DataBuffer> db_ptr;
+  RETURN_IF_NOT_OK(cache_missing_stream->GetNextBuffer(&db_ptr, workerId));
+  while (!db_ptr->eof()) {
+    if (db_ptr->eoe()) {
+      // Ignore it.
+      MS_LOG(DEBUG) << "Ignore eoe";
+    } else {
+      while (db_ptr->NumRows() > 0) {
+        TensorRow row;
+        RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+        row_id_type row_id = row.getId();
+        if (row_id < 0) {
+          std::string errMsg = "Expect positive row id: " + std::to_string(row_id);
+          RETURN_STATUS_UNEXPECTED(errMsg);
+        }
+        TensorRowRequest *rq = nullptr;
+        RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+        rq->WakeUpAny(std::move(row));
+        // Let the cleaner to flush out this row (async) to the cache server.
+        RETURN_IF_NOT_OK(io_que_->EmplaceBack(row_id));
+      }
+    }
+    RETURN_IF_NOT_OK(cache_missing_stream->GetNextBuffer(&db_ptr, workerId));
+  }
+  return Status::OK();
+}
+Status CacheMergeOp::Cleaner() {
+  TaskManager::FindMe()->Post();
+  while (true) {
+    row_id_type row_id;
+    RETURN_IF_NOT_OK(io_que_->PopFront(&row_id));
+    if (row_id < 0) {
+      break;
+    }
+    TensorRowRequest *rq = nullptr;
+    RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+    if (rq->GetState() == TensorRowRequest::State::kClean) {
+      // If already flushed, move on to the next one.
+      continue;
+    }
+    TensorRow row;
+    RETURN_IF_NOT_OK(rq->Release(&row));
+    CHECK_FAIL_RETURN_UNEXPECTED(!row.empty(), "Programming error.");
+    Status rc = cache_client_->WriteRow(row);
+    // Bad rc should not bring down the pipeline
+    if (rc.IsError()) {
+      MS_LOG(WARNING) << "Cache not successful." << rc.ToString();
+    }
+    rq->SetState(TensorRowRequest::State::kClean);
+  }
+  return Status::OK();
+}
+
+Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowRequest **out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  std::unique_lock<std::mutex> lck(mux_);
+  auto it = cache_miss_map_.find(row_id);
+  if (it != cache_miss_map_.end()) {
+    *out = it->second.GetMutablePointer();
+  } else {
+    // We will create a new one.
+    auto alloc = Services::GetAllocator<TensorRowRequest>();
+    auto r = cache_miss_map_.emplace(row_id, MemGuard<TensorRowRequest, Allocator<TensorRowRequest>>(alloc));
+    if (r.second) {
+      auto &mem = r.first->second;
+      RETURN_IF_NOT_OK(mem.allocate(1, row_id));
+      *out = mem.GetMutablePointer();
+    } else {
+      RETURN_STATUS_UNEXPECTED("Map insert fail.");
+    }
+  }
+  return Status::OK();
+}
+Status CacheMergeOp::PrepareNodePostAction() {  // Run any common code from super class first before adding our own
+                                                // specific logic
+  CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 2, "Incorrect number of children");
+  RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
+  // Get the computed check sum from all ops in the cache miss class
+  uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]);
+  // This is a mappable cache op so the id's need to be generated.
+  // Construct the cache
+  const bool generate_ids = false;
+  Status rc = cache_client_->CreateCache(cache_crc, generate_ids);
+  if (rc.get_code() == StatusCode::kDuplicateKey) {
+    // We are told the cache has been created already.
+    MS_LOG(INFO) << "Cache created already";
+    rc = Status::OK();
+  }
+  RETURN_IF_NOT_OK(rc);
+  return Status::OK();
+}
+Status CacheMergeOp::ComputeColMap() {
+  CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Cache miss stream empty");
+  if (column_name_id_map().empty()) {
+    column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map();
+  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!column_name_id_map().empty(), "No column map detected");
+  return Status::OK();
+}
+Status CacheMergeOp::TensorRowRequest::Wait(TensorRow *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  // Block until the missing row is in the pool.
+  RETURN_IF_NOT_OK(use_count_.P());
+  std::unique_lock<std::mutex> lck(dq_mux_);
+  CHECK_FAIL_RETURN_UNEXPECTED(!row_.empty(), "Programming error");
+  *out = std::move(row_.front());
+  row_.pop_front();
+  return Status::OK();
+}
+void CacheMergeOp::TensorRowRequest::WakeUpAny(TensorRow &&row) {
+  std::unique_lock<std::mutex> lck(dq_mux_);
+  // Technically number of this row shows up in the cache miss stream is equal to the number
+  // of P() call. However the cleaner wants it too. So we need an extra copy.
+  if (GetState() == State::kEmpty) {
+    // We will do a deep copy
+    for (auto &ts : row) {
+      auto out_ts = std::make_shared<Tensor>(ts->shape(), ts->type(), ts->GetBuffer(), ts->SizeInBytes());
+      cleaner_copy_.push_back(out_ts);
+    }
+    cleaner_copy_.setId(row.getId());
+    // Change the state to dirty
+    SetState(State::kDirty);
+  }
+  row_.push_back(std::move(row));
+  // Bump up the use count by 1. This wake up any parallel worker which is waiting
+  // for this row.
+  use_count_.V();
+}
+Status CacheMergeOp::TensorRowRequest::Release(TensorRow *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  // We are not holding any mutex here because the cleaner isn't really touching the deque row_.
+  // In case we have multiple cleaners and they all see the copy, only one of them will
+  // get it.
+  auto expected = State::kDirty;
+  if (st_.compare_exchange_strong(expected, State::kClean)) {
+    *out = std::move(cleaner_copy_);
+  }
+  return Status::OK();
+}
+// Builder constructor. Creates the builder object.
+CacheMergeOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  build_op_connector_size_ = cfg->op_connector_size();
+  build_num_cleaners_ = 1;
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheMergeOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheMergeOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "Cache client for CacheMergeOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheMergeOp::Builder::Build(std::shared_ptr<CacheMergeOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheMergeOp>(build_num_workers_, build_op_connector_size_, build_num_cleaners_,
+                                        build_cache_client_, build_sampler_);
+  return Status::OK();
+}
+
+// Pre-Visitor accept method for NodePass
+Status CacheMergeOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<CacheMergeOp>(), modified);
+}
+
+// Visitor accept method for NodePass
+Status CacheMergeOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheMergeOp>(), modified);
+}
+
+Status CacheMergeOp::EoeReceived(int32_t worker_id) {
+  // If we are in a repeat path, send the eoe up.
+  // Otherwise ignore it.
+  if (BitTest(op_ctrl_flags_, kDeOpRepeated)) {
+    return DatasetOp::EoeReceived(worker_id);
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h
new file mode 100644
index 0000000000..df37465fc4
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h
@@ -0,0 +1,196 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
+
+#include <atomic>
+#include <deque>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/semaphore.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief Provides method to merge two streams (one from CacheLookup and one from cache miss stream) into one single
+/// stream
+class CacheMergeOp : public ParallelOp {
+ public:
+  // Some handshake structures among the main thread, cleaner threads and parallel op threads.
+  class TensorRowRequest {
+   public:
+    enum class State : uint8_t {
+      kEmpty = 0,  // No row in the deque
+      kDirty = 1,  // Cleaner hasn't flushed it to the cache server yet.
+      kClean = 2   // The row has been flushed already.
+    };
+    explicit TensorRowRequest(row_id_type id) : st_(State::kEmpty), use_count_(0) {}
+    ~TensorRowRequest() = default;
+    State GetState() const { return st_; }
+    void SetState(State newState) { st_ = newState; }
+    Status Wait(TensorRow *out);
+    void WakeUpAny(TensorRow &&row);
+    Status Release(TensorRow *out);
+
+   private:
+    std::mutex dq_mux_;
+    std::atomic<State> st_;
+    Semaphore use_count_;
+    std::deque<TensorRow> row_;
+    TensorRow cleaner_copy_;
+  };
+
+  constexpr static int kCacheHitChildIdx = 0;   // Cache hit stream
+  constexpr static int kCacheMissChildIdx = 1;  // Cache miss stream
+
+  /// \brief The nested builder class inside of the CacheMergeOp is used to help manage all of
+  /// the arguments for constructing it.  Use the builder by setting each argument
+  /// with the provided set methods, and then finally call the build method to execute
+  /// the actual construction.
+  class Builder {
+   public:
+    /// Builder constructor. Creates the builder object.
+    /// \note No default args
+    Builder();
+
+    /// Default destructor
+    ~Builder() = default;
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param sampler
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param num_cleaners
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumCleaner(int32_t num_cleaners) {
+      build_num_cleaners_ = num_cleaners;
+      return *this;
+    }
+
+    /// The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheMergeOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheMergeOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t build_op_connector_size_;
+    int32_t build_num_cleaners_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    /// Check if the required parameters are set by the builder.
+    /// \return Status The error code return
+    Status SanityCheck() const;
+  };
+
+  /// \brief Constructor
+  /// \param numWorkers Number of parallel workers as a derived class of ParallelOp
+  /// \param opConnector Size Connector size as a derived class of ParallelOp
+  /// \param numCleaners Number of cleaners to move cache miss rows into the cache server
+  /// \param cache_client CacheClient to commmunicate with the Cache server
+  /// \param sampler as a derived class of ParallelOp
+  CacheMergeOp(int32_t numWorkers, int32_t opConnectorSize, int32_t numCleaners,
+               std::shared_ptr<CacheClient> cache_client, const std::shared_ptr<Sampler> &sampler);
+  ~CacheMergeOp();
+  void Print(std::ostream &out, bool show_all) const override;
+  friend std::ostream &operator<<(std::ostream &out, const CacheMergeOp &mo) {
+    mo.Print(out, false);
+    return out;
+  }
+  /// \brief Master thread responsible to spawn all the necessary worker threads for the two streams and
+  /// the threads for the cleaners.
+  /// \return
+  Status operator()() override;
+  /// \brief Entry function for worker thread that fetch rows from CacheLookupOp
+  /// \param workerId
+  /// \return Status object
+  Status WorkerEntry(int32_t workerId) override;
+  Status PrepareNodePostAction() override;
+  /// \brief Entry function for worker thread that fetch rows from the cache miss stream
+  /// \param workerId
+  /// \return Status object
+  Status CacheMissWorkerEntry(int32_t workerId);
+  Status GetRq(row_id_type row_id, TensorRowRequest **);
+
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for eoe handling
+  /// \param worker_id
+  /// \return Status object
+  Status EoeReceived(int32_t worker_id) override;
+
+ protected:
+  Status ComputeColMap() override;
+
+ private:
+  std::mutex mux_;
+  std::map<row_id_type, MemGuard<TensorRowRequest, Allocator<TensorRowRequest>>> cache_miss_map_;
+  std::unique_ptr<Queue<row_id_type>> io_que_;
+  std::shared_ptr<CacheClient> cache_client_;
+  int32_t num_cleaners_;
+
+  /// \brief These are the entry functions for the cleaner threads. Each cleaner is responsible for
+  /// moving cache miss TensorRow into the CacheServer.
+  /// \return Status object
+  Status Cleaner();
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc
new file mode 100644
index 0000000000..143c45b2dc
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc
@@ -0,0 +1,219 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+
+#include <memory>
+#include <vector>
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor. Creates the builder object.
+CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  build_op_connector_size_ = cfg->op_connector_size();
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cache client for CacheOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheOp::Builder::Build(std::shared_ptr<CacheOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_,
+                                   build_sampler_);
+  RETURN_IF_NOT_OK((*ptr)->InitCache());
+
+  return Status::OK();
+}
+
+// Constructor of CacheOp
+CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                 std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+    : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler),
+      num_guys_in_(0),
+      phase_(Phase::kBuildPhase) {}
+
+// Destructor
+CacheOp::~CacheOp() = default;
+
+// Private function for cache setup/init work just after construction
+Status CacheOp::InitCache() { return Status::OK(); }
+
+// This class functor will provide the master loop that drives the logic for performing the work
+Status CacheOp::operator()() {
+  if (!sampler_) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "CacheOp requires a sampler before it can be executed!");
+  }
+  RETURN_IF_NOT_OK(RegisterResources());
+  // Kick off the workers
+  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CacheOp::WorkerEntry, this, std::placeholders::_1)));
+  // required task group sync after launching workers
+  TaskManager::FindMe()->Post();
+  // Wait for the workers to finish caching the rows.
+  RETURN_IF_NOT_OK(WaitForCachingAllRows());
+  RETURN_IF_NOT_OK(FetchSamplesToWorkers());
+  return Status::OK();
+}
+Status CacheOp::CacheAllRows(int32_t worker_id) {
+  // If the current phase is to fill the cache, do it then.
+  if (phase_ == Phase::kBuildPhase) {
+    // We will take the chance to cache the schema at the server.
+    // Just do it once and pick one worker to do it.
+    if (worker_id == 0) {
+      RETURN_IF_NOT_OK(cache_client_->CacheSchema(column_name_id_map()));
+    }
+    MS_LOG(INFO) << "CacheOp first epoch SAVE mode started. Worker: " << worker_id;
+    // SAVE mode loop
+    std::unique_ptr<DataBuffer> db_ptr;
+    RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+    while (!db_ptr->eof()) {
+      if (!db_ptr->eoe()) {
+        RETURN_IF_NOT_OK(cache_client_->WriteBuffer(std::move(db_ptr)));
+      } else {
+        // In a repeat-over-cache scenario, any of the "real" leaf operators below us have been set up
+        // as non-repeating leaf ops.  As such, they only do one epoch and then quit.  Since we got the
+        // the eoe to indicate the end of the epoch, we should next expect to get the eof.
+        // Drain this eof so that we don't leave it sitting there on a connector that we'll never fetch
+        // from again.
+        RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+        if (!db_ptr->eof()) {
+          RETURN_STATUS_UNEXPECTED("Cache op expects to get an eof after eoe from child.");
+        }
+      }
+      RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+    }
+  }
+  // Let the main guy know we are done.
+  auto last_guy_in = num_guys_in_.fetch_add(1);
+  if ((last_guy_in + 1) == num_workers_) {
+    rows_cache_done_.Set();
+  } else {
+    // Let's do a sync up here.
+    RETURN_IF_NOT_OK(rows_cache_done_.Wait());
+  }
+  return Status::OK();
+}
+Status CacheOp::WaitForCachingAllRows() {
+  // Wait for the workers to finish caching the rows.
+  RETURN_IF_NOT_OK(rows_cache_done_.Wait());
+  // Move from build phase to fetch phase if we are the one to fill the cache
+  if (phase_ == Phase::kBuildPhase) {
+    RETURN_IF_NOT_OK(cache_client_->BuildPhaseDone());
+    // Move to the next phase
+    phase_ = Phase::kFetchPhase;
+  }
+  // Get statistics from the server, and if we are not the one to create the cache,
+  // wait until the state changed from build phase to fetch base.
+  CacheClient::ServiceStat stat{};
+  bool BuildPhaseDone = true;
+  do {
+    RETURN_IF_NOT_OK(cache_client_->GetStat(&stat));
+    BuildPhaseDone = stat.cache_service_state == static_cast<uint8_t>(CacheService::State::kFetchPhase);
+    if (!BuildPhaseDone) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+  } while (!BuildPhaseDone);
+  const row_id_type min_key = stat.min_row_id;
+  const row_id_type max_key = stat.max_row_id;
+  num_rows_ = max_key - min_key + 1;
+  MS_LOG(INFO) << "Number of rows cached: " << num_rows_;
+  MS_LOG(INFO) << "Number of rows cached in memory : " << stat.num_mem_cached;
+  MS_LOG(INFO) << "Number of rows spilled to disk : " << stat.num_disk_cached;
+  // Now all rows are cached and we have done a sync point check up. Next phase is
+  // is pick up fetch input from sampler and pass up to the caller.
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
+  return Status::OK();
+}
+Status CacheOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(CacheAllRows(worker_id));
+  RETURN_IF_NOT_OK(FetchFromCache(worker_id));
+  return Status::OK();
+}
+Status CacheOp::RegisterResources() {
+  RETURN_IF_NOT_OK(CacheBase::RegisterResources());
+  RETURN_IF_NOT_OK(rows_cache_done_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(keys_miss_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+
+// Base-class override for setting specific CacheOp configurations. This code will be called
+// during the execution tree prepare phase BEFORE traversing down to child operators.
+uint32_t CacheOp::PrepareFlags() const { return ExecutionTree::kDePrepCache; }
+// Base-class override for special eoe handler.
+// CacheOp must override this because it shall not perform default handling of eoe. Instead
+// the CacheOp manages actions related to the end of the epoch.
+Status CacheOp::EoeReceived(int32_t worker_id) {
+  state_ = OpState::kDeOpIdle;
+  return Status::OK();
+}
+// Base-class override for handling cases when an eof is received.
+Status CacheOp::EofReceived(int32_t worker_id) {
+  // eofReceived is overloaded because we want to manually handle this eof.
+  // Specifically, the default behaviour is to pack it and flow it up to the next connection.
+  // In this case, we want a no-op behaviour so that we can perform correct action.
+  return Status::OK();
+}
+
+// Pre-Visitor accept method for NodePass
+Status CacheOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<CacheOp>(), modified);
+}
+
+// Visitor accept method for NodePass
+Status CacheOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheOp>(), modified);
+}
+
+// A public wrapper for creating the cache through the client
+Status CacheOp::CreateCache(uint32_t cache_crc) {
+  // This is a non-mappable cache op so the id's need to be generated.
+  // Construct the cache
+  const bool generate_ids = true;
+  Status rc = cache_client_->CreateCache(cache_crc, generate_ids);
+  if (rc.get_code() == StatusCode::kDuplicateKey) {
+    // We are told the cache has been created already. So we skip the build phase.
+    phase_ = Phase::kFetchPhase;
+    rc = Status::OK();
+  }
+  RETURN_IF_NOT_OK(rc);
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h
new file mode 100644
index 0000000000..dd34d54973
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h
@@ -0,0 +1,168 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
+
+#include <atomic>
+#include <string>
+#include <utility>
+#include <memory>
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief CacheOp provides a memory/disk cache that acts as a save-point within a non-mappable dataset.
+/// \note For mappable dataset, please see CacheLookupOp.
+/// \see CacheLookupOp
+class CacheOp : public CacheBase, public RandomAccessOp {
+ public:
+  // This CacheOp is for non-mappable case where it is divided into two phases.
+  // The first phase is we cache all the rows from the child (and let the cache server
+  // assigns row id). No read access in the first phase. Once the cache is fully built,
+  // we switch to second phase and fetch requests from the sampler.
+  enum class Phase : uint8_t { kBuildPhase = 0, kFetchPhase = 1 };
+
+  /// \brief The nested builder class inside of the CacheOp is used to help manage all of
+  /// the arguments for constructing it.  Use the builder by setting each argument
+  /// with the provided set methods, and then finally call the build method to execute
+  /// the actual construction.
+  class Builder {
+   public:
+    // Builder constructor. Creates the builder object.
+    // @note No default args
+    // @return This is a constructor.
+    Builder();
+
+    // Default destructor
+    ~Builder() = default;
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param rows_per_buffer
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
+      rows_per_buffer_ = rows_per_buffer;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param sampler
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t rows_per_buffer_;
+    int32_t build_op_connector_size_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    /// \brief Check if the required parameters are set by the builder.
+    /// \return Status The error code return
+    Status SanityCheck() const;
+  };
+
+  /// \brief Constructor of CacheOp
+  /// \note The builder class should be used to call it.
+  /// \param num_workers The number of worker threads.
+  /// \param op_connector_size The size of each queue in the connector.
+  CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+          std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler);
+
+  // Destructor
+  ~CacheOp();
+
+  /// \brief Base-class override for setting specific CacheOp configurations. This code will be called
+  /// during the execution tree prepare phase BEFORE traversing down to child operators.
+  uint32_t PrepareFlags() const override;
+  /// \brief Base-class override for special eoe handler.
+  /// CacheOp must override this because it shall not perform default handling of eoe. Instead
+  /// the CacheOp manages actions related to the end of the epoch.
+  /// \return Status - The error code return
+  Status EoeReceived(int32_t worker_id) override;
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+  /// \brief Base-class override for handling cases when an eof is received.
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
+  Status EofReceived(int32_t worker_id) override;
+  Status operator()() override;
+  Status WorkerEntry(int32_t worker_id) override;
+  /// \brief Base-class override for handling cases if we allow cache miss
+  bool AllowCacheMiss() override { return false; }
+  /// \brief Base-class override for the name of this operator
+  std::string Name() const override { return "CacheOp"; }
+  /// \brief A public wrapper for creating the cache through the client
+  /// \param[in] cache_crc The crc that identifies the cache
+  /// \see cache_pass.cc
+  /// \return Status return code
+  Status CreateCache(uint32_t cache_crc);
+
+ private:
+  WaitPost rows_cache_done_;
+  std::atomic<int64_t> num_guys_in_;
+  Phase phase_;
+  /// \brief The main thread will wait until all the rows are cached and will start the handshake with the sampler.
+  /// \return Status object
+  Status WaitForCachingAllRows();
+  /// \brief For non-mappable dataset, there is a build phase where we cache all the rows.
+  /// \return Status object
+  Status CacheAllRows(int32_t worker_id);
+  Status RegisterResources() override;
+  /// \brief Private function for cache setup/init work just after construction
+  /// \return Status The error code return
+  Status InitCache();
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
similarity index 78%
rename from mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
index 4bada31e7e..7acb68350b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
@@ -17,11 +17,11 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/concat_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/concat_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
@@ -61,46 +61,39 @@ void ConcatOp::Print(std::ostream &out, bool show_all) const {
 Status ConcatOp::operator()() {
   // The children_num_ parameter needs to be put here
   children_num_ = static_cast<int32_t>(child_.size());
-
   TaskManager::FindMe()->Post();
   std::unique_ptr<DataBuffer> buf;
-  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
-
   int eof_count = 0;
-  while (eof_count != children_num_) {
+  while (eof_count == 0) {
     for (int i = 0; i < children_num_; i++) {
-      // 1. Throw the eof buffer when meet it
-      if (buf->eof() || buf->eoe()) {
-        RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
+      // 1. Read the first buffer
+      RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
+      if (buf->eof()) {
+        eof_count++;
+        continue;
       }
       // 2. Do verification as for column name, column data type and rank of column data
-      RETURN_IF_NOT_OK(Verify(i, buf));
-
+      if (!buf->eoe()) {
+        RETURN_IF_NOT_OK(Verify(i, buf));
+      }
       // 3. Put the data into output_connector
       while (!buf->eoe() && !buf->eof()) {
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(buf)));
         RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
       }
-
-      // 4. Throw the eoe buffer when meet it
-      if (buf->eoe() && (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat))) {
-        RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
-      }
-      // 5. Add eoe buffer after get buffer from all child
-      if (i == (children_num_ - 1)) {
-        auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
-        RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
-      }
-      if (buf->eof()) {
-        eof_count++;
-      }
+    }
+    // 4. Add eoe buffer after get buffer from all child
+    if (eof_count == 0) {
+      auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
     }
   }
-  // 6. Add eof buffer in the end manually
+  CHECK_FAIL_RETURN_UNEXPECTED(eof_count == children_num_,
+                               "Something went wrong, eof count does not match the number of children.");
+  // 5. Add eof buffer in the end manually
   MS_LOG(DEBUG) << "Add the eof buffer manualy in the end.";
   auto eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
   RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer)));
-
   return Status::OK();
 }
 
@@ -126,12 +119,6 @@ Status ConcatOp::Verify(int32_t id, const std::unique_ptr<DataBuffer> &buf) {
   return Status::OK();
 }
 
-Status ConcatOp::PrepareNodePostAction() {
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
 // We need to overwrite the super class ComputeColMap here because the number of children is more than 1.
 Status ConcatOp::ComputeColMap() {
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
index 4bcfdbf6c6..3d3d9df71c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -75,12 +75,6 @@ class ConcatOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "ConcatOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
index 3e31f6c017..9254141308 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 #include <iomanip>
 #include <iostream>
@@ -23,12 +23,12 @@
 #include <string>
 #include <algorithm>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/system/crc32c.h"
 #include "utils/log_adapter.h"
 
@@ -153,16 +153,38 @@ Status DatasetOp::Remove() {
     }
   }
 
+  // Finally, clear "this" op's parent and child pointers since we have just
+  // disconnected it from the tree and invalidate it's fields.
+  child_.clear();
+  parent_.clear();
+  operator_id_ = kInvalidOperatorId;
+  tree_ = nullptr;
+
   return Status::OK();
 }
 
-// Getter function to get a shared pointer to our childAdds a operator to become our child.
+// Getter function to get a shared pointer to our child
 std::shared_ptr<DatasetOp> DatasetOp::child(int32_t child_index) const {
+  std::shared_ptr<DatasetOp> return_op = nullptr;
+  if (child_.empty()) {
+    return return_op;
+  }
   MS_ASSERT(child_index < static_cast<int>(child_.size()));
   // Return a shared pointer
   return child_[child_index];
 }
 
+// Getter function to get the parent pointer
+void DatasetOp::Parent(DatasetOp **parent, int32_t parent_index) const {
+  if (parent_.empty()) {
+    // common case if this is a root node
+    *parent = nullptr;
+  } else {
+    MS_ASSERT(parent_index < static_cast<int>(parent_.size()));
+    *parent = parent_[parent_index];
+  }
+}
+
 // Creates the connector within this operator
 void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) {
   MS_LOG(DEBUG) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers
@@ -264,19 +286,11 @@ Status DatasetOp::EofReceived(int32_t worker_id) {
 
 // During tree prepare phase, operators may have specific pre-operations to perform depending on
 // their role.
-Status DatasetOp::PrepareNodePreAction() {
-  if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepRepeat)) set_control_flag(kDeOpRepeated);
-  return Status::OK();
-}
+Status DatasetOp::PrepareNodePreAction() { return Status::OK(); }
+
 // During tree prepare phase, operators may have specific post-operations to perform depending on
 // their role.
 Status DatasetOp::PrepareNodePostAction() {
-  // If this op does not have any children and it is in a repeat path of the tree...
-  if (child_.empty() && BitTest(op_ctrl_flags_, kDeOpRepeated)) {
-    // push ourselves onto the eoe operator stack.  Later, a repeat/epoch ctrl operator
-    // above us will consume them.
-    tree_->AddToEOEOpStack(shared_from_this());
-  }
   // Creating Connector object for each op.
   // The consumer of the root node is assumed to be one thread.
   // If multiple threads are consuming from the root node, they will get the ordered data in round robin fashion.
@@ -346,34 +360,13 @@ Status DatasetOp::Accept(NodePass *p, bool *modified) {
   return p->RunOnNode(shared_from_this(), modified);
 }
 
-// A helper function with some common code that leaf nodes can use during
-// prepare phase for checking if they need to assign a sampler to the cache.
-Status DatasetOp::SaveSamplerForCache(bool random_access_op) {
-  // If we are a descendant under a cache op and we have a sampler, then save this sampler
-  // to a stack so that the cache can pick it up during it's processing above us.
-  if (sampler_) {
-    if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
-      // use move semantic to set our sampler_ to null after the move.  This is okay because a sampler is
-      // useless to a random data op.  It was only being used as a temporary holding until the cache can
-      // be created
-      tree_->AddToSamplerStack(sampler_);
-      MS_LOG(INFO) << "Preparing a leaf op: passing sampler up the tree for Cache handling.";
-    } else if (!random_access_op) {
-      // A sampler exists, but we are not in a caching tree and we are not a random access mappable leaf.
-      // This is an error because that type of leaf does not use sampling unless there's a cache to hook it into.
-      RETURN_STATUS_UNEXPECTED(
-        "Non-mappable leaf op has a sampler, but it only supports sampling if there is a cache after it in the tree");
-    }
-  }
-
-  if (!random_access_op) {
-    // Since we don't truly need the sampler for this non-mappable dataset and it's been saved for the cache
-    // we can remove it now from the base.
-    sampler_.reset();
-  }
-
+// Getter for the sampler, and it also removes the sampler from the op
+Status DatasetOp::FetchRemoveSampler(std::shared_ptr<Sampler> *sampler) {
+  *sampler = sampler_;  // It's okay if it sampler_ points to nullptr
+  sampler_.reset();     // clear our member-copy of this pointer.  We no longer have this sampler
   return Status::OK();
 }
+
 uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
   std::stringstream ss;
   op->tree_->Print(ss, op);
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
index ab5cb90357..b4630c1652 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
@@ -21,12 +21,13 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
+
 // Forward declare
 class ExecutionTree;
 
@@ -45,10 +46,10 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
  public:
   static constexpr int32_t kInvalidOperatorId = -1;
 
-  // Flags that control operator runtime behaviours
+  // Operator control flags
   enum OpControlFlags {
     kDeOpNone = 0,
-    kDeOpRepeated = 1,        // Operator is a leaf node in a repeat path
+    kDeOpRepeated = 1,        // Operator is a node in a repeat path
     kDeOpLastRepeat = 1 << 1  // We are in the last repeat loop
   };
 
@@ -71,17 +72,23 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \param child - shared pointer to the child to remove.
   Status RemoveChild(std::shared_ptr<DatasetOp> child);
 
-  /// \brief Removes this node from the tree and connects it's parent/child together.
+  /// \brief Removes this node from the tree and connects it's parent/child together
   /// \return Status eerror code returned
   Status Remove();
 
   /// \brief Getter function to get a shared pointer to our child
-  /// \param child_index - An operator can have n children. Indicates choose which child to return.
+  /// \param[in] child_index An operator can have n children. Indicates which child to return.
+  /// \return The shared pointer to the child.  If there are no children, it returns null regardless of the given index
   std::shared_ptr<DatasetOp> child(int32_t child_index) const;
 
-  /// \brief Inserts a operator as the parent current op.
-  /// Inserted op will become the sole parent of the current op.
-  /// The existing parent of the current op will be transferred to the inserted op.
+  /// \brief Getter function to get the pointer to our parent
+  ///     If there are no parents, it returns null regardless of the given index
+  /// \param[in] parent_index An operator can have n parents. Indicates which parent to return.
+  void Parent(DatasetOp **parent, int32_t parent_index) const;
+
+  // Inserts a operator as the parent current op.
+  // Inserted op will become the sole parent of the current op.
+  // The existing parent of the current op will be transferred to the inserted op.
   Status InsertAsParent(std::shared_ptr<DatasetOp> to_add);
 
   /// \brief Creates the connector within this operator
@@ -161,16 +168,6 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return Status - The error code return
   virtual Status Reset();
 
-  /// \brief This calls the reset function on this subtree in pre-order
-  /// \return Status - The error code return
-  virtual Status ResetSubtree() {
-    RETURN_IF_NOT_OK(Reset());
-    for (const auto &c : child_) {
-      RETURN_IF_NOT_OK(c->ResetSubtree());
-    }
-    return Status::OK();
-  }
-
   /// \brief During tree prepare phase, operators may have specific pre-operations to perform depending on
   /// their role.
   /// \notes Derived versions of this function should always call it's superclass version first
@@ -296,7 +293,12 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return Shared pointer to the sampler (may return nullptr)
   std::shared_ptr<Sampler> sampler() { return sampler_; }
 
-  /// Computes a CRC value for the operator
+  /// \brief Getter for the sampler, and it also removes the sampler from the op
+  /// \param[out] sampler A pointer to the output sampler that was removed
+  /// \return Status error code
+  Status FetchRemoveSampler(std::shared_ptr<Sampler> *sampler);
+
+  // Computes a CRC value for the operator
   static uint32_t GenerateCRC(const std::shared_ptr<DatasetOp> &op);
 
   /// \brief A helper templated function for casting "this" pointer to shared_ptr<derived>
@@ -307,17 +309,24 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
     return std::static_pointer_cast<Derived>(shared_from_this());
   }
 
- protected:
-  /// Adds a parent operator to this operator
-  /// \notes External callers do not have access to this function.
-  /// \param parent - The parent node to add
-  void AddParent(DatasetOp *parent);
+  /// \brief Setter for the sampler.  Allows you to overwrite a previous sampler with a new one.
+  void SetSampler(std::shared_ptr<Sampler> sampler) { sampler_ = sampler; }
 
-  /// Removes a parent operator from this operator
-  /// \notes External callers do not have access to this function.
-  /// \param parent - The parent node to remove
+  /// \brief Checks if this is a leaf node (0 children)
+  /// \return boolean returns true if it's a leaf
+  bool IsLeaf() { return (child_.empty()); }
+
+ protected:
+  /// \brief Removes a parent operator from this operator
+  /// \notes External callers do not have access to this function
+  /// \param[in] parent The parent node to remove
   void RemoveParent(const DatasetOp *parent);
 
+  /// \brief Adds a parent operator to this operator
+  /// \notes External callers do not have access to this function
+  /// \param[in] parent The parent node to add
+  void AddParent(DatasetOp *parent);
+
   /// Compute the current op's column map using its child's column map.
   /// Get called during the tree post-prepare phase in PrepareNodePostAction.
   /// This base implementation just inherits the map from child 0, and can only be used if the number of children is 1.
@@ -325,12 +334,6 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return - Status
   virtual Status ComputeColMap();
 
-  /// A helper function with some common code that leaf nodes can use during
-  /// pre/pare phase for checking if they need to assign a sampler to the cache.
-  /// \param random_access_op - indicate if this is a mappable random access leaf or not
-  /// \return - Status
-  Status SaveSamplerForCache(bool random_access_op);
-
   std::vector<std::shared_ptr<DatasetOp>> child_;                // Child nodes
   std::vector<DatasetOp *> parent_;                              // Parent nodes. No ownership
   std::shared_ptr<Sampler> sampler_;                             // Some leaf ops might have a sampler
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
index 0f1fefc0f0..4fe779246b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
@@ -17,16 +17,16 @@
 #include <iomanip>
 #include <iostream>
 #include <memory>
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
index a854004593..0fb4fb093d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
@@ -20,15 +20,15 @@
 #include <string>
 #include <vector>
 
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 #ifdef ENABLE_TDTQUE
-#include "dataset/engine/tdt/tdt_plugin.h"
+#include "minddata/dataset/engine/tdt/tdt_plugin.h"
 #endif
 
 #ifdef ENABLE_GPUQUE
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 using mindspore::device::BlockQueueStatus_T;
 using mindspore::device::GpuBufferMgr;
 #endif
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
index 81c93c6e1c..f32648a3df 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
 #include <algorithm>
 #include <cstring>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/filter_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
index 36f70cb82f..fcc6e577df 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/queue.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
index 05a1ac7925..e5e70dbbdf 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
 #include <cstring>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/config_manager.h"
-
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/config_manager.h"
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/map_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
index 371d865196..b1cd58010f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
@@ -21,9 +21,9 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/queue.h"
 
 namespace mindspore {
 namespace dataset {
@@ -181,6 +181,13 @@ class MapOp : public ParallelOp {
   // @return Name of the current Op
   std::string Name() const override { return "MapOp"; }
 
+  // List of tensor ops getter/setter
+  // @Return the vector of tensor ops by non-const reference
+
+  auto &TFuncs() { return tfuncs_; }
+
+  const auto &TFuncs() const { return tfuncs_; }
+
  private:
   // Local queues where worker threads can pop from.
   // Popping directly from the Connector can block if the previous designated threads haven't pop.
@@ -188,7 +195,7 @@ class MapOp : public ParallelOp {
   QueueList<std::unique_ptr<DataBuffer>> local_queues_;
 
   // Static variables to be ready by worker threads, no modification and readonly
-  const std::vector<std::shared_ptr<TensorOp>> tfuncs_;
+  std::vector<std::shared_ptr<TensorOp>> tfuncs_;
 
   // Variable to store the column name that the tensorOps are consuming
   std::vector<std::string> in_columns_;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
index 244861a6c8..abb827aea8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
 
 #include <iostream>
 #include <utility>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
index f59d4bfc53..da54ce1331 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
@@ -18,9 +18,9 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
index 1d017a4d3e..fff5ba19e7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 #include <iomanip>
 #include <iostream>
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
index cb3c76813b..0538349f48 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
@@ -18,7 +18,7 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
index 5ce4056024..e232a64164 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/project_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
index 628c1342ba..c2f14d34b7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
index 23cd29d295..d12660e6f9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
 #include <iomanip>
 #include <vector>
 #include <utility>
 #include <unordered_map>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
index e209c075d6..d846bb1b40 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
@@ -20,9 +20,9 @@
 #include <queue>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
index 4999dddd02..6d3dc91ed3 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
@@ -17,11 +17,11 @@
 #include <iostream>
 #include <utility>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 #include "utils/log_adapter.h"
 
@@ -77,26 +77,6 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
   }
 }
 
-// Base-class override for executing specific RepeatOp configurations. This code will be called
-// during the execution tree prepare phase when it is visiting this operator.
-Status RepeatOp::PrepareNodePostAction() {
-  // Run any common code from super class first before adding our own specific logic
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  std::shared_ptr<DatasetOp> leaf_op = tree_->PopFromEOEOpStack();
-  while (leaf_op != nullptr) {
-    // Track the leaf operators that are under this repeat op.
-    eoe_ops_.push_back(leaf_op);
-    leaf_op = tree_->PopFromEOEOpStack();
-  }
-  // Push ourselves to the stack in case one of our ascendants is repeat too.
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
-// Base-class override for setting specific RepeatOp configurations. This code will be called
-// during the execution tree prepare phase BEFORE traversing down to child operators.
-uint32_t RepeatOp::PrepareFlags() const { return ExecutionTree::kDePrepRepeat; }
-
 // This function returns the buffer that is at the top of our output connector. The caller is
 // typically our parent node, when the parent is asking us to provide the next buffer of data.
 // Since RepeatOp is an inlined op, getting a buffer from us will simply bounce you to get
@@ -130,7 +110,8 @@ Status RepeatOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo
 // Base-class override for handling cases when an eoe is received.
 Status RepeatOp::EoeReceived(int32_t worker_id) {
   repeat_count_++;
-  MS_LOG(DEBUG) << "Repeat operator end of epoch message received. Repeat count is now: " << repeat_count_ << ".";
+  MS_LOG(DEBUG) << "Repeat operator (" << operator_id_
+                << ") end of epoch message received. Repeat count is now: " << repeat_count_ << ".";
   bool repeated = BitTest(op_ctrl_flags_, kDeOpRepeated);
   bool last_repeat = BitTest(op_ctrl_flags_, kDeOpLastRepeat);
   // If we've reached the requested repeat count, then flag the eoe nodes
@@ -149,8 +130,12 @@ Status RepeatOp::EoeReceived(int32_t worker_id) {
     return Status::OK();
   }
 
-  //  base-class ResetSubtree
-  return (DatasetOp::ResetSubtree());
+  // Invoke a reset against the eoe nodes only.
+  for (auto &eoe_op : eoe_ops_) {
+    RETURN_IF_NOT_OK(eoe_op->Reset());
+  }
+
+  return Status::OK();
 }
 
 // Class functor operator () override.
@@ -178,6 +163,18 @@ int32_t RepeatOp::num_consumers() const {
   }
 }
 
+// Drive reset actions if needed
+Status RepeatOp::Reset() {
+  // If there's nested repeats, an ascendant repeat may have ourself listed as an eoe op.
+  // In that case, we now have to bounce the reset down to our own eoe ops.
+  MS_LOG(DEBUG) << "Repeat operator (" << operator_id_ << ") reset.";
+  for (auto &eoe_op : eoe_ops_) {
+    RETURN_IF_NOT_OK(eoe_op->Reset());
+  }
+  state_ = OpState::kDeOpRunning;
+  return Status::OK();
+}
+
 int32_t RepeatOp::num_producers() const {
   if (child_.empty() || child_[0] == nullptr) {
     MS_LOG(DEBUG) << "Repeat operator, pointer to child node is null. Returning 0.";
@@ -187,6 +184,12 @@ int32_t RepeatOp::num_producers() const {
   }
 }
 
+// Pre-Visitor accept method for NodePass
+Status RepeatOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<RepeatOp>(), modified);
+}
+
 // Visitor accept method for NodePass
 Status RepeatOp::Accept(NodePass *p, bool *modified) {
   // Downcast shared pointer then call visitor
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
index bba85c3bb5..f5259de30e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
@@ -18,8 +18,9 @@
 
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -82,14 +83,6 @@ class RepeatOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // Base-class override for setting specific RepeatOp configurations. This code will be called
-  // during the execution tree prepare phase BEFORE traversing down to child operators.
-  uint32_t PrepareFlags() const override;
-
-  // Base-class override for executing specific RepeatOp configurations. This code will be called
-  // during the execution tree post-prepare phase when it is visiting this operator.
-  Status PrepareNodePostAction() override;
-
   // This function returns the buffer that is at the top of our output connector. The caller is
   // typically our parent node, when the parent is asking us to provide the next buffer of data.
   // Since RepeatOp is an inlined op, getting a buffer from us will simply bounce you to get
@@ -110,6 +103,10 @@ class RepeatOp : public PipelineOp {
   // @param worker_id - The worker id
   Status EofReceived(int32_t worker_id) override;
 
+  /// \brief reset Op
+  /// \@return Status - The error code return
+  Status Reset() override;
+
   // Base-class override. Return the number of workers in the first parent.
   // @param workerId - The worker id
   int32_t num_consumers() const override;
@@ -118,16 +115,26 @@ class RepeatOp : public PipelineOp {
   // @param workerId - The worker id
   int32_t num_producers() const override;
 
-  // Base-class override for NodePass visitor acceptor.
-  // @param p - Pointer to the NodePass to be accepted.
-  // @param modified - Whether this node visit modified the pipeline.
-  // @return - Status of the node visit.
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
   Status Accept(NodePass *p, bool *modified) override;
 
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "RepeatOp"; }
 
+  /// \brief Adds an operator to the repeat ops list of tracked leaf/eoe nodes
+  /// \param[in] eoe_op The input leaf/eoe operator to add to the list
+  void AddToEoeList(std::shared_ptr<DatasetOp> eoe_op) { eoe_ops_.push_back(std::move(eoe_op)); }
+
  private:
   int32_t max_repeats_;                              // The number of repeats that the user requested
   int32_t repeat_count_;                             // A counter for the current number of executed repeats
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
index f86fcc602b..0eb5f29eaf 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
@@ -25,14 +25,14 @@
 #include <random>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
index 14b1e4511e..86bea7cc77 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
@@ -24,11 +24,11 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
index f6b0fe689c..2fe8cbeaa6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
@@ -17,12 +17,12 @@
 #include <iostream>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
index 4cb658b2a7..a717d0efa4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
new file mode 100644
index 0000000000..389e3f5af6
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@@ -0,0 +1,32 @@
+add_subdirectory(sampler)
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+    io_block.cc
+    image_folder_op.cc
+    mnist_op.cc
+    coco_op.cc
+    cifar_op.cc
+    random_data_op.cc
+    celeba_op.cc
+    text_file_op.cc
+    clue_op.cc
+    )
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+    ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+    mindrecord_op.cc
+    tf_reader_op.cc
+    )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+        generator_op.cc
+        voc_op.cc
+        manifest_op.cc
+        )
+endif()
+
+add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES})
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
index c7a4269a39..9d7d5622a6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
@@ -13,16 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 
 #include <fstream>
 #include <iomanip>
-#include "dataset/core/config_manager.h"
-#include "dataset/util/path.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -408,6 +409,12 @@ Status CelebAOp::Reset() {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status CelebAOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CelebAOp>(), modified);
+}
+
 Status CelebAOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
index a6fa495a14..ef183f8e65 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
@@ -24,12 +24,12 @@
 #include <utility>
 #include <fstream>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/queue.h"
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 #define CLOSE_FILE(attr_file, pairition_file) \
   do {                                        \
@@ -169,6 +169,12 @@ class CelebAOp : public ParallelOp, RandomAccessOp {
   // @return Status - The error code return
   Status AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const { return "CelebAOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
index 8dd615a8c1..06be682bfd 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -21,11 +21,12 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -450,6 +451,12 @@ Status CifarOp::CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *
   }
 }
 
+// Visitor accept method for NodePass
+Status CifarOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CifarOp>(), modified);
+}
+
 Status CifarOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
index 917b23db94..60169f32bf 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -155,6 +155,12 @@ class CifarOp : public ParallelOp, public RandomAccessOp {
   // @return
   static Status CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *count);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "CifarOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
index 9fceb6f333..958514583a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
 
 #include <string>
 #include <vector>
@@ -21,12 +21,12 @@
 #include <iomanip>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
index 487ed0d47f..ab429561ec 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
@@ -23,9 +23,9 @@
 #include <vector>
 #include <nlohmann/json.hpp>
 
-#include "dataset/util/auto_index.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
index 92f6794769..daef2f284b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
@@ -13,17 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
 
 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -624,6 +625,12 @@ Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file,
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status CocoOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CocoOp>(), modified);
+}
+
 Status CocoOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
index 3791853798..31070c26f5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
@@ -23,17 +23,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -200,6 +200,12 @@ class CocoOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                                  std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
  private:
   // Initialize Sampler, calls sampler->Init() within
   // @return Status - The error code return
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
index 36c221fc16..773dfc78b6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
 #include <iomanip>
-#include "dataset/core/global_context.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
index 98dd2d70a1..d09bfc3d71 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
@@ -22,11 +22,11 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
index 837eae1e3c..85839303db 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
index 6629fd6092..153751d3c5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
@@ -25,18 +25,18 @@
 #include <set>
 #include <utility>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
index 0963f1a67a..2b2542430b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 #include <string>
 #include <vector>
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
index 87b417f027..df26aa1fc1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/util/auto_index.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
index e65da8707b..0476baf56f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -21,11 +21,12 @@
 #include <nlohmann/json.hpp>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -416,6 +417,12 @@ Status ManifestOp::GetClassIndexing(const std::string &file, const py::dict &dic
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status ManifestOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<ManifestOp>(), modified);
+}
+
 Status ManifestOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
index c180ea581d..bac8f04c94 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -172,6 +172,12 @@ class ManifestOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage,
                                  std::map<std::string, int32_t> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "ManifestOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
index 2b9d010ebb..cf1493eb78 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -22,14 +22,14 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
index af405a8f5b..367505b172 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
@@ -27,16 +27,16 @@
 #include <unordered_set>
 #include <vector>
 
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
index e98f8ae8c1..11ad18865e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
@@ -13,16 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
 
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -428,6 +429,12 @@ Status MnistOp::CountTotalRows(const std::string &dir, int64_t *count) {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status MnistOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<MnistOp>(), modified);
+}
+
 Status MnistOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
index 9bd6276a11..039f6b112f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
@@ -23,16 +23,16 @@
 #include <vector>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -152,6 +152,12 @@ class MnistOp : public ParallelOp, public RandomAccessOp {
   // @return
   static Status CountTotalRows(const std::string &dir, int64_t *count);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "MnistOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
index 3a865d8d69..46f3adfa62 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include <iomanip>
 #include <random>
-#include "dataset/engine/execution_tree.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/util/random.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -406,6 +407,12 @@ Status RandomDataOp::Reset() {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status RandomDataOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<RandomDataOp>(), modified);
+}
+
 Status RandomDataOp::ComputeColMap() {
   // Extract the column name mapping from the schema and save it in the class.
   if (column_name_id_map_.empty()) {
@@ -415,15 +422,5 @@ Status RandomDataOp::ComputeColMap() {
   }
   return Status::OK();
 }
-
-// During tree prepare phase, operators may have specific post-operations to perform depending on
-// their role.
-Status RandomDataOp::PrepareNodePostAction() {
-  // Run common code from super class before adding RandomDataOp specific handling
-  RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
-  // Specific handling for this op, we need to do cache op work to assign the sampler to the cache.
-  RETURN_IF_NOT_OK(DatasetOp::SaveSamplerForCache(false));
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
index b2af27dda3..c77695439d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
@@ -23,12 +23,12 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -203,12 +203,6 @@ class RandomDataOp : public ParallelOp {
   // @return Name of the current Op
   std::string Name() const override { return "RandomDataOp"; }
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
  private:
   /**
    * The entry point code for when workers are launched
@@ -266,6 +260,12 @@ class RandomDataOp : public ParallelOp {
     return ++buffer_id_;
   }
 
+  // Base-class override for NodePass visitor acceptor.
+  // @param p - Pointer to the NodePass to be accepted.
+  // @param modified - Whether this node visit modified the pipeline.
+  // @return - Status of the node visit.
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Private function for computing the assignment of the column name map.
   // @return - Status
   Status ComputeColMap() override;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
similarity index 51%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
index 5209d9ba4a..1335d987e8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
@@ -1,12 +1,21 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source-sampler OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
     distributed_sampler.cc
     pk_sampler.cc
-    python_sampler.cc
     random_sampler.cc
     sampler.cc
     sequential_sampler.cc
     subset_random_sampler.cc
     weighted_random_sampler.cc
     )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
+        python_sampler.cc
+        )
+endif()
+
+add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES})
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
index 9f4a9cf55c..2b5e7c67c8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
index 7083580c6c..76bcf052f9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
@@ -21,7 +21,7 @@
 #include <random>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
index cd2cadb9ff..770c24c8c5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include <algorithm>
 #include <memory>
-#include "dataset/util/random.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
index cde8a75b5b..aed61fa273 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
@@ -22,7 +22,7 @@
 #include <random>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
index d204c55ce9..50c67bca6c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/python_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/python_sampler.h"
 
 #include <memory>
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
index 7d653b2087..61716feb94 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
@@ -19,7 +19,7 @@
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
index db0a96ea3a..998dee2a07 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
 
 #include <algorithm>
 #include <limits>
 #include <memory>
-#include "dataset/util/random.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
index b1c54eb98c..6e21b088b9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
index 1584166dc3..60d75d2eec 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 #include <string>
 
@@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const {
   }
 }
 
+#ifdef ENABLE_PYTHON
 Status Sampler::GetAllIdsThenReset(py::array *data) {
   std::unique_ptr<DataBuffer> db;
   std::shared_ptr<Tensor> sample_ids;
@@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) {
   RETURN_IF_NOT_OK(ResetSampler());
   return Status::OK();
 }
+#endif
 
 Status Sampler::SetNumSamples(int64_t num_samples) {
   CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative");
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
index 34c3cb7935..4cae935a42 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
@@ -22,10 +22,10 @@
 #include <random>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -74,8 +74,11 @@ class Sampler {
   // @return - The error code return
   virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0;
 
+// This function only called by python layer. Not needed by Android.
+#ifdef ENABLE_PYTHON
   // return all ids in one epoch as a numpy array, then call reset
   Status GetAllIdsThenReset(py::array *data);
+#endif
 
   // for next epoch of sampleIds
   // @return - The error code return
@@ -155,5 +158,4 @@ class Sampler {
 };
 }  // namespace dataset
 }  // namespace mindspore
-
 #endif  // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
index 28598da55f..1cc4ac831a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
index 06f084fb7a..c6ccd0d1eb 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
@@ -19,7 +19,7 @@
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
index 08a623ed1b..db2078795e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
 
 #include <algorithm>
 #include <memory>
 #include <random>
 #include <string>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
index ffc7cb17bc..fccc15e57b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
index 6bf3d2d85e..13863143c0 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
 
 #include <algorithm>
 #include <memory>
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/global_context.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
index 1fbe29ed80..b1a531abe9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
index 818b5ab3f4..c1f5b13a94 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
@@ -22,13 +22,13 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/util/random.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
index 5b787d4dad..68c226ab80 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
@@ -23,14 +23,14 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/engine/jagged_connector.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/jagged_connector.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
index 48f13ff766..ae7907b5ce 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -28,21 +28,21 @@
 #include "proto/example.pb.h"
 #include "./securec.h"
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
 #include "utils/system/crc32c.h"
 
 namespace mindspore {
@@ -1019,31 +1019,28 @@ Status TFReaderOp::ComputeColMap() {
   return Status::OK();
 }
 
+// Brief If a cache has been added into the ascendant tree over this tf reader, then the cache will be executing
+// a sampler for fetching the data.  As such, any options in the tf reader need to be reset to its defaults so
+// that this tf reader will produce the full set of data into the cache.
+void TFReaderOp::MakeSimpleProducer() {
+  device_id_ = 0;
+  num_devices_ = 1;
+  total_rows_ = 0;
+  shuffle_files_ = false;
+  equal_rows_per_shard_ = false;
+}
+
 // During tree prepare phase, operators may have specific post-operations to perform depending on
 // their role.
 Status TFReaderOp::PrepareNodePostAction() {
   // Run common code from super class before adding TFReaderOp specific handling
   RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
 
-  // Specific handling for this op, we need to do cache op work so assign the sampler to the cache
-  // TF is a special case because it can support file-based sharding/shuffling, or, if there
-  // is a cache, then it can also do row-based sampler using the sampler on the cache.
-  // Thus, pass true for random access op flag when saving the sampler.  This is a special case,
-  // since usually a non-mappable dataset would pass false here.
-  RETURN_IF_NOT_OK(DatasetOp::SaveSamplerForCache(true));
-
   // Now that the sampler has been saved for the cache, we need to adjust the TFReaderOp to turn it into
   // a simpler producer of all data (no shuffling or sharding or anything)
-  if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
-    device_id_ = 0;
-    num_devices_ = 1;
-    total_rows_ = 0;
-    shuffle_files_ = false;
-    equal_rows_per_shard_ = false;
-    sampler_.reset();  // Normally SaveSampler code did this for us, but we passed in true above (See comment)
-  } else {
+  if (!BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
     // This sanity check had been delayed until now in the prepare loop.
-    // If we are not in a cache path, then we can validate the the file-based sharding config.
+    // If we are not in a cache path, then we can validate the file-based sharding config.
     // If we are in a cache path, there is no file-based sharding so the check is not correct in that
     // situation.
     if (!equal_rows_per_shard_ && dataset_files_list_.size() < static_cast<uint32_t>(num_devices_)) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
index 9226c4c6c5..c03f3957e9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
@@ -25,12 +25,12 @@
 #include <utility>
 #include <map>
 
-#include "dataset/util/wait_post.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
 
 namespace dataengine {
 class Example;
@@ -246,6 +246,11 @@ class TFReaderOp : public ParallelOp {
   // @return Vector of the input file names
   std::vector<std::string> FileNames() { return dataset_files_list_; }
 
+  /// \Brief If a cache has been added into the ascendant tree over this tf reader, then the cache will be executing
+  ///     a sampler for fetching the data.  As such, any options in the tf reader need to be reset to its defaults so
+  ///     that this tf reader will produce the full set of data into the cache.
+  void MakeSimpleProducer();
+
   // During tree prepare phase, operators may have specific post-operations to perform depending on
   // their role.
   // @notes Derived versions of this function should always call it's superclass version first
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
index 958aa65b06..e90d423ef4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
@@ -13,18 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
 
 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include "./tinyxml2.h"
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 using tinyxml2::XMLDocument;
 using tinyxml2::XMLElement;
@@ -69,7 +70,7 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
     RETURN_IF_NOT_OK(builder_schema_->AddColumn(
       ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
     RETURN_IF_NOT_OK(builder_schema_->AddColumn(
-      ColDescriptor(std::string(kColumnAnnotation), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
+      ColDescriptor(std::string(kColumnAnnotation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
   }
   *ptr = std::make_shared<VOCOp>(builder_task_type_, builder_task_mode_, builder_dir_, builder_labels_to_read_,
                                  builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_,
@@ -308,30 +309,30 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
   }
   while (object != nullptr) {
     std::string label_name;
-    uint32_t xmin = 0, ymin = 0, xmax = 0, ymax = 0, truncated = 0, difficult = 0;
+    float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
     XMLElement *name_node = object->FirstChildElement("name");
     if (name_node != nullptr && name_node->GetText() != 0) label_name = name_node->GetText();
     XMLElement *truncated_node = object->FirstChildElement("truncated");
-    if (truncated_node != nullptr) truncated = truncated_node->UnsignedText();
+    if (truncated_node != nullptr) truncated = truncated_node->FloatText();
     XMLElement *difficult_node = object->FirstChildElement("difficult");
-    if (difficult_node != nullptr) difficult = difficult_node->UnsignedText();
+    if (difficult_node != nullptr) difficult = difficult_node->FloatText();
 
     XMLElement *bbox_node = object->FirstChildElement("bndbox");
     if (bbox_node != nullptr) {
       XMLElement *xmin_node = bbox_node->FirstChildElement("xmin");
-      if (xmin_node != nullptr) xmin = xmin_node->UnsignedText();
+      if (xmin_node != nullptr) xmin = xmin_node->FloatText();
       XMLElement *ymin_node = bbox_node->FirstChildElement("ymin");
-      if (ymin_node != nullptr) ymin = ymin_node->UnsignedText();
+      if (ymin_node != nullptr) ymin = ymin_node->FloatText();
       XMLElement *xmax_node = bbox_node->FirstChildElement("xmax");
-      if (xmax_node != nullptr) xmax = xmax_node->UnsignedText();
+      if (xmax_node != nullptr) xmax = xmax_node->FloatText();
       XMLElement *ymax_node = bbox_node->FirstChildElement("ymax");
-      if (ymax_node != nullptr) ymax = ymax_node->UnsignedText();
+      if (ymax_node != nullptr) ymax = ymax_node->FloatText();
     } else {
       RETURN_STATUS_UNEXPECTED("bndbox dismatch in " + path);
     }
     if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
         ymin > 0 && xmax > xmin && ymax > ymin) {
-      std::vector<uint32_t> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, truncated, difficult};
+      std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, truncated, difficult};
       bbox.emplace_back(std::make_pair(label_name, bbox_list));
       label_index_[label_name] = 0;
     }
@@ -376,17 +377,17 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
 Status VOCOp::ReadAnnotationToTensor(const std::string &path, const ColDescriptor &col,
                                      std::shared_ptr<Tensor> *tensor) {
   Bbox bbox_info = label_map_[path];
-  std::vector<uint32_t> bbox_row;
+  std::vector<float> bbox_row;
   dsize_t bbox_column_num = 0, bbox_num = 0;
   for (auto box : bbox_info) {
     if (label_index_.find(box.first) != label_index_.end()) {
-      std::vector<uint32_t> bbox;
+      std::vector<float> bbox;
+      bbox.insert(bbox.end(), box.second.begin(), box.second.end());
       if (class_index_.find(box.first) != class_index_.end()) {
-        bbox.emplace_back(class_index_[box.first]);
+        bbox.push_back(static_cast<float>(class_index_[box.first]));
       } else {
-        bbox.emplace_back(label_index_[box.first]);
+        bbox.push_back(static_cast<float>(label_index_[box.first]));
       }
-      bbox.insert(bbox.end(), box.second.begin(), box.second.end());
       bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end());
       if (bbox_column_num == 0) {
         bbox_column_num = static_cast<dsize_t>(bbox.size());
@@ -449,6 +450,11 @@ Status VOCOp::GetClassIndexing(const std::string &dir, const std::string &task_t
 
   return Status::OK();
 }
+// Visitor accept method for NodePass
+Status VOCOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<VOCOp>(), modified);
+}
 
 Status VOCOp::ComputeColMap() {
   // Set the column name map (base class field)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
index 89875341ca..e0c46c7a94 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,7 +40,7 @@ namespace dataset {
 template <typename T>
 class Queue;
 
-using Bbox = std::vector<std::pair<std::string, std::vector<uint32_t>>>;
+using Bbox = std::vector<std::pair<std::string, std::vector<float>>>;
 
 class VOCOp : public ParallelOp, public RandomAccessOp {
  public:
@@ -205,6 +205,12 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                                  const py::dict &dict, std::map<std::string, int32_t> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "VOCOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
index 8bc449cdc9..d1f07983f7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
@@ -17,12 +17,12 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -127,12 +127,6 @@ Status TakeOp::FillBuffer(std::unique_ptr<DataBuffer> *buffer, std::unique_ptr<D
   return Status::OK();
 }
 
-Status TakeOp::PrepareNodePostAction() {
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
 // Visitor accept method for NodePass
 Status TakeOp::Accept(NodePass *p, bool *modified) {
   // Downcast shared pointer then call visitor
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/take_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
index 9619a4409d..7f3f821bd8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -78,12 +78,6 @@ class TakeOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
   // Base-class override for NodePass visitor acceptor.
   // @param p - Pointer to the NodePass to be accepted.
   // @param modified - Whether this node visit modified the pipeline.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
index 70bce16a89..88019c30fc 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
 #include <utility>
 #include <iomanip>
-#include "dataset/core/constants.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/zip_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
index fad3c22eaa..c9466e26e2 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/db_connector.h b/mindspore/ccsrc/minddata/dataset/engine/db_connector.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/db_connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/db_connector.h
index 54909f51ba..4a5c20bc12 100644
--- a/mindspore/ccsrc/dataset/engine/db_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/db_connector.h
@@ -18,9 +18,9 @@
 
 #include <memory>
 #include <utility>
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/engine/execution_tree.cc
rename to mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
index 385722e257..55dec24e79 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
@@ -13,16 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/execution_tree.h"
 #include <iostream>
 #include <string>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/opt/post/repeat_pass.h"
+#include "mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/monitor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,6 +36,7 @@ ExecutionTree::ExecutionTree() : id_count_(0) {
   prepare_flags_ = kDePrepNone;
   perf_monitor_ = std::make_unique<Monitor>(this);
   profiling_manager_ = std::make_unique<ProfilingManager>(this);
+  optimize_ = common::GetEnv("OPTIMIZE") == "true" ? true : false;
 }
 
 // Destructor
@@ -200,8 +204,10 @@ Status ExecutionTree::Prepare() {
   // Pre optimization compulsory transformation
   RETURN_IF_NOT_OK(this->PrepareTreePreAction());
 
-  // Optimization transformation
-  RETURN_IF_NOT_OK(this->Optimize());
+  // If optional optimizations are enabled
+  if (optimize_) {
+    RETURN_IF_NOT_OK(this->Optimize());
+  }
 
   // Post optimization compulsory transformation
   RETURN_IF_NOT_OK(this->PrepareTreePostAction());
@@ -215,25 +221,47 @@ Status ExecutionTree::PrepareTreePreAction() {
   bool modified = false;
   std::vector<std::unique_ptr<Pass>> pre_actions;
   // Construct pre actions
-  MS_LOG(INFO) << "Running pre pass";
-  pre_actions.push_back(std::make_unique<RemovalPass>(RemovalPass()));
+  MS_LOG(INFO) << "Running pre pass loops.";
+  pre_actions.push_back(std::make_unique<RemovalPass>());
+  pre_actions.push_back(std::make_unique<CacheTransformPass>());
   // Apply pre action passes
   for (auto &pass : pre_actions) {
     RETURN_IF_NOT_OK(pass->Run(this, &modified));
   }
+  MS_LOG(INFO) << "Pre passes complete.";
   return Status::OK();
 }
 
 Status ExecutionTree::PrepareTreePostAction() {
   // The tree is ready to be prepared.
   tree_state_ = kDeTStatePrepare;
+
+  bool modified = false;
+  std::vector<std::unique_ptr<Pass>> post_actions;
+  // Construct pre actions
+  MS_LOG(INFO) << "Running post pass loops.";
+  post_actions.push_back(std::make_unique<RepeatPass>());
+
+  // Apply post action passes
+  for (auto &pass : post_actions) {
+    RETURN_IF_NOT_OK(pass->Run(this, &modified));
+  }
+  MS_LOG(INFO) << "Post passes complete.";
+
   return Status::OK();
 }
 
 Status ExecutionTree::Optimize() {
-  //  auto pp = new PrinterPass();
-  //  bool modified = false;
-  //  pp->Run(this, &modified);
+  // Vector of optimizations, currently only 1, add more as necessary
+  std::vector<std::unique_ptr<NodePass>> optimizations;
+  optimizations.push_back(std::make_unique<TensorOpFusionPass>());
+  // vector of flags for each optimization
+  std::vector<bool> modified(optimizations.size(), false);
+  for (auto i = 0; i < optimizations.size(); i++) {
+    auto m = false;
+    optimizations[i]->Run(this, &m);
+    modified[i] = m;
+  }
   return Status::OK();
 }
 
@@ -280,31 +308,5 @@ Status ExecutionTree::PrepareNode(const std::shared_ptr<DatasetOp> &dataset_op)
 
   return Status::OK();
 }
-
-// Adds an operator to the eoe operator stack during prepare phase.
-void ExecutionTree::AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op) { eoe_stack_.push(dataset_op); }
-
-// Pops an operator from the eoe operator stack during prepare phase.
-std::shared_ptr<DatasetOp> ExecutionTree::PopFromEOEOpStack() {
-  std::shared_ptr<DatasetOp> top_op = nullptr;
-  if (!eoe_stack_.empty()) {
-    top_op = eoe_stack_.top();
-    eoe_stack_.pop();
-  }
-  return top_op;
-}
-
-// Adds a sampler to the sampler stack during prepare phase.
-void ExecutionTree::AddToSamplerStack(std::shared_ptr<Sampler> sampler) { sampler_stack_.push(sampler); }
-
-// Pops an operator from the sampler stack during prepare phase.
-std::shared_ptr<Sampler> ExecutionTree::PopFromSamplerStack() {
-  std::shared_ptr<Sampler> top_sampler = nullptr;
-  if (!sampler_stack_.empty()) {
-    top_sampler = sampler_stack_.top();
-    sampler_stack_.pop();
-  }
-  return top_sampler;
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.h b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
similarity index 88%
rename from mindspore/ccsrc/dataset/engine/execution_tree.h
rename to mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
index 5ebfa539ad..b62bf8e85d 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
@@ -21,9 +21,9 @@
 #include <stack>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/util/status.h"
-#include "mindspore/ccsrc/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/util/status.h"
+#include "mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
@@ -87,6 +87,8 @@ class ExecutionTree {
     // @return Shared pointer to the current operator
     std::shared_ptr<DatasetOp> get() { return nodes_[ind_]; }
 
+    bool operator==(const Iterator &rhs) { return nodes_[ind_] == rhs.nodes_[rhs.ind_]; }
+
     bool operator!=(const Iterator &rhs) { return nodes_[ind_] != rhs.nodes_[rhs.ind_]; }
 
     int32_t NumNodes() { return nodes_.size(); }
@@ -200,24 +202,6 @@ class ExecutionTree {
   // @return Status - The error code return
   Status PrepareNode(const std::shared_ptr<DatasetOp> &dataset_op);
 
-  /// Adds an operator to the eoe operator stack during prepare phase.
-  /// \param op - The dataset op to work add to eoe stack
-  /// \return Status - The error code return
-  void AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op);
-
-  /// Pops an operator from the eoe operator stack during prepare phase.
-  /// \return shared_ptr to the popped operator
-  std::shared_ptr<DatasetOp> PopFromEOEOpStack();
-
-  /// Adds a sampler to the sampler stack during prepare phase.
-  /// \param samplerop - The dataset op to work add to eoe stack
-  /// \return Status - The error code return
-  void AddToSamplerStack(std::shared_ptr<Sampler> sampler);
-
-  /// Pops an operator from the sampler stack during prepare phase.
-  /// \return shared_ptr to the popped operator
-  std::shared_ptr<Sampler> PopFromSamplerStack();
-
   // Return the pointer to the TaskGroup
   // @return raw pointer to the TaskGroup
   TaskGroup *AllTasks() const { return tg_.get(); }
@@ -232,6 +216,21 @@ class ExecutionTree {
   // Getter for profiling manager, no ownership
   ProfilingManager *GetProfilingManager() { return profiling_manager_.get(); }
 
+  // Set optional optimization if tree has not been prepared yet
+  Status SetOptimize(bool value) {
+    if (tree_state_ != kDeTStateInit && tree_state_ != kDeTStateBuilding) {
+      std::string optimize = (optimize_ == true) ? "true" : "false";
+      std::string msg = "Tree has already been prepared with OPTIMIZE set to " + optimize;
+      RETURN_STATUS_UNEXPECTED(msg);
+    } else {
+      optimize_ = value;
+      return Status::OK();
+    }
+  }
+
+  // Optional optimizations status
+  bool OptimizationEnabled() const { return optimize_; }
+
  private:
   // A helper functions for doing the recursive printing
   // @param dataset_op - The dataset op to print
@@ -248,9 +247,10 @@ class ExecutionTree {
   TreeState tree_state_;                                 // Tracking the current tree state
   std::unique_ptr<Monitor> perf_monitor_;                // Performance Monitor
   std::unique_ptr<ProfilingManager> profiling_manager_;  // Profiling manager
-  std::stack<std::shared_ptr<DatasetOp>> eoe_stack_;     // A stack used during prepare phase
-  std::stack<std::shared_ptr<Sampler>> sampler_stack_;   // A stack used during prepare phase
+  bool optimize_;                                        // Flag to enable optional optimizations
 };
+
+inline bool operator==(const ExecutionTree::Iterator &lhs, const ExecutionTree::Iterator &rhs) { return lhs == rhs; }
 }  // namespace dataset
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/gnn/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/gnn/edge.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/gnn/edge.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
index 47314d97c2..c62c088bab 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/edge.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
@@ -20,9 +20,9 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/feature.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/gnn/feature.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
index e457947821..dba4a6fa60 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/feature.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/gnn/feature.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
index 7ce5967fbd..0d7eba1009 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
@@ -18,8 +18,8 @@
 
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/gnn/graph.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
index a143bd4e38..9083eb4c4b 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/gnn/graph.h"
 
 #include <algorithm>
 #include <functional>
@@ -21,8 +21,8 @@
 #include <numeric>
 #include <utility>
 
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -125,13 +125,8 @@ Status Graph::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list, std::s
 
 Status Graph::GetAllNeighbors(const std::vector<NodeIdType> &node_list, NodeType neighbor_type,
                               std::shared_ptr<Tensor> *out) {
-  if (node_list.empty()) {
-    RETURN_STATUS_UNEXPECTED("Input node_list is empty.");
-  }
-  if (node_type_map_.find(neighbor_type) == node_type_map_.end()) {
-    std::string err_msg = "Invalid neighbor type:" + std::to_string(neighbor_type);
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
+  RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type));
 
   std::vector<std::vector<NodeIdType>> neighbors;
   size_t max_neighbor_num = 0;
@@ -161,6 +156,14 @@ Status Graph::CheckSamplesNum(NodeIdType samples_num) {
   return Status::OK();
 }
 
+Status Graph::CheckNeighborType(NodeType neighbor_type) {
+  if (node_type_map_.find(neighbor_type) == node_type_map_.end()) {
+    std::string err_msg = "Invalid neighbor type:" + std::to_string(neighbor_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  return Status::OK();
+}
+
 Status Graph::GetSampledNeighbors(const std::vector<NodeIdType> &node_list,
                                   const std::vector<NodeIdType> &neighbor_nums,
                                   const std::vector<NodeType> &neighbor_types, std::shared_ptr<Tensor> *out) {
@@ -171,10 +174,7 @@ Status Graph::GetSampledNeighbors(const std::vector<NodeIdType> &node_list,
     RETURN_IF_NOT_OK(CheckSamplesNum(num));
   }
   for (const auto &type : neighbor_types) {
-    if (node_type_map_.find(type) == node_type_map_.end()) {
-      std::string err_msg = "Invalid neighbor type:" + std::to_string(type);
-      RETURN_STATUS_UNEXPECTED(err_msg);
-    }
+    RETURN_IF_NOT_OK(CheckNeighborType(type));
   }
   std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size());
   for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
@@ -228,44 +228,36 @@ Status Graph::GetNegSampledNeighbors(const std::vector<NodeIdType> &node_list, N
                                      NodeType neg_neighbor_type, std::shared_ptr<Tensor> *out) {
   CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   RETURN_IF_NOT_OK(CheckSamplesNum(samples_num));
-  if (node_type_map_.find(neg_neighbor_type) == node_type_map_.end()) {
-    std::string err_msg = "Invalid neighbor type:" + std::to_string(neg_neighbor_type);
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
+  RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type));
 
-  std::vector<std::vector<NodeIdType>> neighbors_vec;
-  neighbors_vec.resize(node_list.size());
+  std::vector<std::vector<NodeIdType>> neg_neighbors_vec;
+  neg_neighbors_vec.resize(node_list.size());
   for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
     std::shared_ptr<Node> node;
     RETURN_IF_NOT_OK(GetNodeByNodeId(node_list[node_idx], &node));
     std::vector<NodeIdType> neighbors;
     RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors));
-    std::unordered_set<NodeIdType> exclude_node;
+    std::unordered_set<NodeIdType> exclude_nodes;
     std::transform(neighbors.begin(), neighbors.end(),
-                   std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_node, exclude_node.begin()),
+                   std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
                    [](const NodeIdType node) { return node; });
-    auto itr = node_type_map_.find(neg_neighbor_type);
-    if (itr == node_type_map_.end()) {
-      std::string err_msg = "Invalid node type:" + std::to_string(neg_neighbor_type);
-      RETURN_STATUS_UNEXPECTED(err_msg);
+    const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type];
+    neg_neighbors_vec[node_idx].emplace_back(node->id());
+    if (all_nodes.size() > exclude_nodes.size()) {
+      while (neg_neighbors_vec[node_idx].size() < samples_num + 1) {
+        RETURN_IF_NOT_OK(NegativeSample(all_nodes, exclude_nodes, samples_num - neg_neighbors_vec[node_idx].size(),
+                                        &neg_neighbors_vec[node_idx]));
+      }
     } else {
-      neighbors_vec[node_idx].emplace_back(node->id());
-      if (itr->second.size() > exclude_node.size()) {
-        while (neighbors_vec[node_idx].size() < samples_num + 1) {
-          RETURN_IF_NOT_OK(NegativeSample(itr->second, exclude_node, samples_num - neighbors_vec[node_idx].size(),
-                                          &neighbors_vec[node_idx]));
-        }
-      } else {
-        MS_LOG(DEBUG) << "There are no negative neighbors. node_id:" << node->id()
-                      << " neg_neighbor_type:" << neg_neighbor_type;
-        // If there are no negative neighbors, they are filled with kDefaultNodeId
-        for (int32_t i = 0; i < samples_num; ++i) {
-          neighbors_vec[node_idx].emplace_back(kDefaultNodeId);
-        }
+      MS_LOG(DEBUG) << "There are no negative neighbors. node_id:" << node->id()
+                    << " neg_neighbor_type:" << neg_neighbor_type;
+      // If there are no negative neighbors, they are filled with kDefaultNodeId
+      for (int32_t i = 0; i < samples_num; ++i) {
+        neg_neighbors_vec[node_idx].emplace_back(kDefaultNodeId);
       }
     }
   }
-  RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>(neighbors_vec, DataType(DataType::DE_INT32), out));
+  RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>(neg_neighbors_vec, DataType(DataType::DE_INT32), out));
   return Status::OK();
 }
 
@@ -280,8 +272,19 @@ Status Graph::RandomWalk(const std::vector<NodeIdType> &node_list, const std::ve
 }
 
 Status Graph::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
-  auto itr = default_feature_map_.find(feature_type);
-  if (itr == default_feature_map_.end()) {
+  auto itr = default_node_feature_map_.find(feature_type);
+  if (itr == default_node_feature_map_.end()) {
+    std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    *out_feature = itr->second;
+  }
+  return Status::OK();
+}
+
+Status Graph::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
+  auto itr = default_edge_feature_map_.find(feature_type);
+  if (itr == default_edge_feature_map_.end()) {
     std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
     RETURN_STATUS_UNEXPECTED(err_msg);
   } else {
@@ -295,7 +298,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
   if (!nodes || nodes->Size() == 0) {
     RETURN_STATUS_UNEXPECTED("Input nodes is empty");
   }
-  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Inpude feature_types is empty");
+  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
   TensorRow tensors;
   for (const auto &f_type : feature_types) {
     std::shared_ptr<Feature> default_feature;
@@ -340,6 +343,45 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
 
 Status Graph::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, const std::vector<FeatureType> &feature_types,
                              TensorRow *out) {
+  if (!edges || edges->Size() == 0) {
+    RETURN_STATUS_UNEXPECTED("Input edges is empty");
+  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
+  TensorRow tensors;
+  for (const auto &f_type : feature_types) {
+    std::shared_ptr<Feature> default_feature;
+    // If no feature can be obtained, fill in the default value
+    RETURN_IF_NOT_OK(GetEdgeDefaultFeature(f_type, &default_feature));
+
+    TensorShape shape(default_feature->Value()->shape());
+    auto shape_vec = edges->shape().AsVector();
+    dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
+    shape = shape.PrependDim(size);
+    std::shared_ptr<Tensor> fea_tensor;
+    RETURN_IF_NOT_OK(
+      Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
+
+    dsize_t index = 0;
+    for (auto edge_itr = edges->begin<EdgeIdType>(); edge_itr != edges->end<EdgeIdType>(); ++edge_itr) {
+      std::shared_ptr<Edge> edge;
+      RETURN_IF_NOT_OK(GetEdgeByEdgeId(*edge_itr, &edge));
+      std::shared_ptr<Feature> feature;
+      if (!edge->GetFeatures(f_type, &feature).IsOk()) {
+        feature = default_feature;
+      }
+      RETURN_IF_NOT_OK(fea_tensor->InsertTensor({index}, feature->Value()));
+      index++;
+    }
+
+    TensorShape reshape(edges->shape());
+    for (auto s : default_feature->Value()->shape().AsVector()) {
+      reshape = reshape.AppendDim(s);
+    }
+    RETURN_IF_NOT_OK(fea_tensor->Reshape(reshape));
+    fea_tensor->Squeeze();
+    tensors.push_back(fea_tensor);
+  }
+  *out = std::move(tensors);
   return Status::OK();
 }
 
@@ -387,6 +429,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) {
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status Graph::GraphInfo(py::dict *out) {
   MetaInfo meta_info;
   RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
@@ -398,6 +441,7 @@ Status Graph::GraphInfo(py::dict *out) {
   (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type);
   return Status::OK();
 }
+#endif
 
 Status Graph::LoadNodeAndEdge() {
   GraphLoader gl(dataset_file_, num_workers_);
@@ -405,7 +449,8 @@ Status Graph::LoadNodeAndEdge() {
   RETURN_IF_NOT_OK(gl.InitAndLoad());
   // get all maps
   RETURN_IF_NOT_OK(gl.GetNodesAndEdges(&node_id_map_, &edge_id_map_, &node_type_map_, &edge_type_map_,
-                                       &node_feature_map_, &edge_feature_map_, &default_feature_map_));
+                                       &node_feature_map_, &edge_feature_map_, &default_node_feature_map_,
+                                       &default_edge_feature_map_));
   return Status::OK();
 }
 
@@ -420,18 +465,33 @@ Status Graph::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
   return Status::OK();
 }
 
+Status Graph::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) {
+  auto itr = edge_id_map_.find(id);
+  if (itr == edge_id_map_.end()) {
+    std::string err_msg = "Invalid edge id:" + std::to_string(id);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    *edge = itr->second;
+  }
+  return Status::OK();
+}
+
 Graph::RandomWalkBase::RandomWalkBase(Graph *graph)
     : graph_(graph), step_home_param_(1.0), step_away_param_(1.0), default_node_(-1), num_walks_(1), num_workers_(1) {}
 
 Status Graph::RandomWalkBase::Build(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path,
                                     float step_home_param, float step_away_param, const NodeIdType default_node,
                                     int32_t num_walks, int32_t num_workers) {
+  CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   node_list_ = node_list;
   if (meta_path.empty() || meta_path.size() > kMaxNumWalks) {
     std::string err_msg = "Failed, meta path required between 1 and " + std::to_string(kMaxNumWalks) +
                           ". The size of input path is " + std::to_string(meta_path.size());
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  for (const auto &type : meta_path) {
+    RETURN_IF_NOT_OK(graph_->CheckNeighborType(type));
+  }
   meta_path_ = meta_path;
   if (step_home_param < kGnnEpsilon || step_away_param < kGnnEpsilon) {
     std::string err_msg = "Failed, step_home_param and step_away_param required greater than " +
@@ -439,6 +499,18 @@ Status Graph::RandomWalkBase::Build(const std::vector<NodeIdType> &node_list, co
                           ", step_away_param: " + std::to_string(step_away_param);
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  if (default_node < -1) {
+    std::string err_msg = "Failed, default_node required to be greater or equal to -1.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  if (num_walks <= 0) {
+    std::string err_msg = "Failed, num_walks parameter required to be greater than 0";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  if (num_workers <= 0) {
+    std::string err_msg = "Failed, num_workers parameter required to be greater than 0";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
   step_home_param_ = step_home_param;
   step_away_param_ = step_away_param;
   default_node_ = default_node;
@@ -488,15 +560,10 @@ Status Graph::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::ve
 }
 
 Status Graph::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) {
-  // Repeatedly simulate random walks from each node
-  std::vector<uint32_t> permutation(node_list_.size());
-  std::iota(permutation.begin(), permutation.end(), 0);
   for (int32_t i = 0; i < num_walks_; i++) {
-    unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
-    std::shuffle(permutation.begin(), permutation.end(), std::default_random_engine(seed));
-    for (const auto &i_perm : permutation) {
+    for (const auto &node : node_list_) {
       std::vector<NodeIdType> walk;
-      RETURN_IF_NOT_OK(Node2vecWalk(node_list_[i_perm], &walk));
+      RETURN_IF_NOT_OK(Node2vecWalk(node, &walk));
       walks->push_back(walk);
     }
   }
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/gnn/graph.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
index 344a6c6bf2..76930d91f2 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
@@ -25,13 +25,13 @@
 #include <vector>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
-#include "dataset/engine/gnn/graph_loader.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -140,8 +140,10 @@ class Graph {
   // @return Status - The error code return
   Status GetMetaInfo(MetaInfo *meta_info);
 
+#ifdef ENABLE_PYTHON
   // Return meta information to python layer
   Status GraphInfo(py::dict *out);
+#endif
 
   Status Init();
 
@@ -181,7 +183,7 @@ class Graph {
     float step_away_param_;  // Inout hyper parameter. Default is 1.0
     NodeIdType default_node_;
 
-    int32_t num_walks_;    // Number of walks per source. Default is 10
+    int32_t num_walks_;    // Number of walks per source. Default is 1
     int32_t num_workers_;  // The number of worker threads. Default is 1
   };
 
@@ -211,12 +213,24 @@ class Graph {
   // @return Status - The error code return
   Status GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature);
 
+  // Get the default feature of a edge
+  // @param FeatureType feature_type -
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  Status GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature);
+
   // Find node object using node id
   // @param NodeIdType id -
   // @param std::shared_ptr<Node> *node - Returned node object
   // @return Status - The error code return
   Status GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node);
 
+  // Find edge object using edge id
+  // @param EdgeIdType id -
+  // @param std::shared_ptr<Node> *edge - Returned edge object
+  // @return Status - The error code return
+  Status GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge);
+
   // Negative sampling
   // @param std::vector<NodeIdType> &input_data - The data set to be sampled
   // @param std::unordered_set<NodeIdType> &exclude_data - Data to be excluded
@@ -228,6 +242,8 @@ class Graph {
 
   Status CheckSamplesNum(NodeIdType samples_num);
 
+  Status CheckNeighborType(NodeType neighbor_type);
+
   std::string dataset_file_;
   int32_t num_workers_;  // The number of worker threads
   std::mt19937 rnd_;
@@ -242,7 +258,8 @@ class Graph {
   std::unordered_map<NodeType, std::unordered_set<FeatureType>> node_feature_map_;
   std::unordered_map<EdgeType, std::unordered_set<FeatureType>> edge_feature_map_;
 
-  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_feature_map_;
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_node_feature_map_;
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_edge_feature_map_;
 };
 }  // namespace gnn
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
index 6504d088bf..9d2c6211f4 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
@@ -18,11 +18,11 @@
 #include <tuple>
 #include <utility>
 
-#include "dataset/engine/gnn/graph_loader.h"
-#include "mindspore/ccsrc/mindrecord/include/shard_error.h"
-#include "dataset/engine/gnn/local_edge.h"
-#include "dataset/engine/gnn/local_node.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
+#include "mindspore/ccsrc/minddata/mindrecord/include/shard_error.h"
+#include "minddata/dataset/engine/gnn/local_edge.h"
+#include "minddata/dataset/engine/gnn/local_node.h"
+#include "minddata/dataset/util/task_manager.h"
 
 using ShardTuple = std::vector<std::tuple<std::vector<uint8_t>, mindspore::mindrecord::json>>;
 
@@ -41,7 +41,8 @@ GraphLoader::GraphLoader(std::string mr_filepath, int32_t num_workers)
 
 Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, NodeTypeMap *n_type_map,
                                      EdgeTypeMap *e_type_map, NodeFeatureMap *n_feature_map,
-                                     EdgeFeatureMap *e_feature_map, DefaultFeatureMap *default_feature_map) {
+                                     EdgeFeatureMap *e_feature_map, DefaultNodeFeatureMap *default_node_feature_map,
+                                     DefaultEdgeFeatureMap *default_edge_feature_map) {
   for (std::deque<std::shared_ptr<Node>> &dq : n_deques_) {
     while (dq.empty() == false) {
       std::shared_ptr<Node> node_ptr = dq.front();
@@ -70,7 +71,7 @@ Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, N
   for (auto &itr : *n_type_map) itr.second.shrink_to_fit();
   for (auto &itr : *e_type_map) itr.second.shrink_to_fit();
 
-  MergeFeatureMaps(n_feature_map, e_feature_map, default_feature_map);
+  MergeFeatureMaps(n_feature_map, e_feature_map, default_node_feature_map, default_edge_feature_map);
   return Status::OK();
 }
 
@@ -81,7 +82,8 @@ Status GraphLoader::InitAndLoad() {
   e_deques_.resize(num_workers_);
   n_feature_maps_.resize(num_workers_);
   e_feature_maps_.resize(num_workers_);
-  default_feature_maps_.resize(num_workers_);
+  default_node_feature_maps_.resize(num_workers_);
+  default_edge_feature_maps_.resize(num_workers_);
   TaskGroup vg;
 
   shard_reader_ = std::make_unique<ShardReader>();
@@ -109,7 +111,7 @@ Status GraphLoader::InitAndLoad() {
 
 Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
                              std::shared_ptr<Node> *node, NodeFeatureMap *feature_map,
-                             DefaultFeatureMap *default_feature) {
+                             DefaultNodeFeatureMap *default_feature) {
   NodeIdType node_id = col_jsn["first_id"];
   NodeType node_type = static_cast<NodeType>(col_jsn["type"]);
   (*node) = std::make_shared<LocalNode>(node_id, node_type);
@@ -133,7 +135,7 @@ Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrec
 
 Status GraphLoader::LoadEdge(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
                              std::shared_ptr<Edge> *edge, EdgeFeatureMap *feature_map,
-                             DefaultFeatureMap *default_feature) {
+                             DefaultEdgeFeatureMap *default_feature) {
   EdgeIdType edge_id = col_jsn["first_id"];
   EdgeType edge_type = static_cast<EdgeType>(col_jsn["type"]);
   NodeIdType src_id = col_jsn["second_id"], dst_id = col_jsn["third_id"];
@@ -214,13 +216,13 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
       std::string attr = col_jsn["attribute"];
       if (attr == "n") {
         std::shared_ptr<Node> node_ptr;
-        RETURN_IF_NOT_OK(
-          LoadNode(col_blob, col_jsn, &node_ptr, &(n_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        RETURN_IF_NOT_OK(LoadNode(col_blob, col_jsn, &node_ptr, &(n_feature_maps_[worker_id]),
+                                  &default_node_feature_maps_[worker_id]));
         n_deques_[worker_id].emplace_back(node_ptr);
       } else if (attr == "e") {
         std::shared_ptr<Edge> edge_ptr;
-        RETURN_IF_NOT_OK(
-          LoadEdge(col_blob, col_jsn, &edge_ptr, &(e_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        RETURN_IF_NOT_OK(LoadEdge(col_blob, col_jsn, &edge_ptr, &(e_feature_maps_[worker_id]),
+                                  &default_edge_feature_maps_[worker_id]));
         e_deques_[worker_id].emplace_back(edge_ptr);
       } else {
         MS_LOG(WARNING) << "attribute:" << attr << " is neither edge nor node.";
@@ -233,7 +235,8 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
 }
 
 void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap *e_feature_map,
-                                   DefaultFeatureMap *default_feature_map) {
+                                   DefaultNodeFeatureMap *default_node_feature_map,
+                                   DefaultEdgeFeatureMap *default_edge_feature_map) {
   for (int wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
     for (auto &m : n_feature_maps_[wkr_id]) {
       for (auto &n : m.second) (*n_feature_map)[m.first].insert(n);
@@ -241,8 +244,11 @@ void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap
     for (auto &m : e_feature_maps_[wkr_id]) {
       for (auto &n : m.second) (*e_feature_map)[m.first].insert(n);
     }
-    for (auto &m : default_feature_maps_[wkr_id]) {
-      (*default_feature_map)[m.first] = m.second;
+    for (auto &m : default_node_feature_maps_[wkr_id]) {
+      (*default_node_feature_map)[m.first] = m.second;
+    }
+    for (auto &m : default_edge_feature_maps_[wkr_id]) {
+      (*default_edge_feature_map)[m.first] = m.second;
     }
   }
   n_feature_maps_.clear();
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
index 0ad54bae6d..f7f9245b8a 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
@@ -24,14 +24,14 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/graph.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 namespace mindspore {
 namespace dataset {
 namespace gnn {
@@ -43,7 +43,8 @@ using NodeTypeMap = std::unordered_map<NodeType, std::vector<NodeIdType>>;
 using EdgeTypeMap = std::unordered_map<EdgeType, std::vector<EdgeIdType>>;
 using NodeFeatureMap = std::unordered_map<NodeType, std::unordered_set<FeatureType>>;
 using EdgeFeatureMap = std::unordered_map<EdgeType, std::unordered_set<FeatureType>>;
-using DefaultFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
+using DefaultNodeFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
+using DefaultEdgeFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
 
 // this class interfaces with the underlying storage format (mindrecord)
 // it returns raw nodes and edges via GetNodesAndEdges
@@ -63,7 +64,7 @@ class GraphLoader {
   // random order. src_node and dst_node in Edge are node_id only with -1 as type.
   // features attached to each node and edge are expected to be filled correctly
   Status GetNodesAndEdges(NodeIdMap *, EdgeIdMap *, NodeTypeMap *, EdgeTypeMap *, NodeFeatureMap *, EdgeFeatureMap *,
-                          DefaultFeatureMap *);
+                          DefaultNodeFeatureMap *, DefaultEdgeFeatureMap *);
 
  private:
   //
@@ -77,19 +78,19 @@ class GraphLoader {
   // @param mindrecord::json &jsn - contains raw data
   // @param std::shared_ptr<Node> *node - return value
   // @param NodeFeatureMap *feature_map -
-  // @param DefaultFeatureMap *default_feature -
+  // @param DefaultNodeFeatureMap *default_feature -
   // @return Status - the status code
   Status LoadNode(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Node> *node,
-                  NodeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+                  NodeFeatureMap *feature_map, DefaultNodeFeatureMap *default_feature);
 
   // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
   // @param mindrecord::json &jsn - contains raw data
   // @param std::shared_ptr<Edge> *edge - return value, the edge ptr, edge is not yet connected
   // @param FeatureMap *feature_map
-  // @param DefaultFeatureMap *default_feature -
+  // @param DefaultEdgeFeatureMap *default_feature -
   // @return Status - the status code
   Status LoadEdge(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Edge> *edge,
-                  EdgeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+                  EdgeFeatureMap *feature_map, DefaultEdgeFeatureMap *default_feature);
 
   // @param std::string key - column name
   // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
@@ -108,7 +109,7 @@ class GraphLoader {
                            std::shared_ptr<Tensor> *tensor);
 
   // merge NodeFeatureMap and EdgeFeatureMap of each worker into 1
-  void MergeFeatureMaps(NodeFeatureMap *, EdgeFeatureMap *, DefaultFeatureMap *);
+  void MergeFeatureMaps(NodeFeatureMap *, EdgeFeatureMap *, DefaultNodeFeatureMap *, DefaultEdgeFeatureMap *);
 
   const int32_t num_workers_;
   std::atomic_int row_id_;
@@ -118,7 +119,8 @@ class GraphLoader {
   std::vector<std::deque<std::shared_ptr<Edge>>> e_deques_;
   std::vector<NodeFeatureMap> n_feature_maps_;
   std::vector<EdgeFeatureMap> e_feature_maps_;
-  std::vector<DefaultFeatureMap> default_feature_maps_;
+  std::vector<DefaultNodeFeatureMap> default_node_feature_maps_;
+  std::vector<DefaultEdgeFeatureMap> default_edge_feature_maps_;
   const std::vector<std::string> keys_;
 };
 }  // namespace gnn
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
index 7465b689d5..642c73eed3 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/local_edge.h"
+#include "minddata/dataset/engine/gnn/local_edge.h"
 
 #include <string>
 
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_edge.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/gnn/local_edge.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
index a34fc00373..d112972f8f 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
@@ -20,10 +20,10 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/local_node.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
index c829f8e8ca..8eaf9bb716 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/local_node.h"
+#include "minddata/dataset/engine/gnn/local_node.h"
 
 #include <algorithm>
 #include <string>
 #include <utility>
 
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/gnn/local_node.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
index bc069d073f..9c122931e7 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
@@ -20,9 +20,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/node.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/node.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
index 282f856797..a7c803fee2 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
@@ -20,8 +20,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/jagged_connector.h b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/jagged_connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
index 2058c542a8..cee0b7abf3 100644
--- a/mindspore/ccsrc/dataset/engine/jagged_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/status.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
similarity index 71%
rename from mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
index 080d968cfc..0ab1fb7925 100644
--- a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-opt OBJECT
           pass.cc
+          post/repeat_pass.cc
+          pre/cache_pass.cc
+          pre/cache_transform_pass.cc
           pre/removal_nodes.cc
           pre/removal_pass.cc
+          optional/tensor_op_fusion_pass.cc
           util/printer_pass.cc
         )
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
new file mode 100644
index 0000000000..d8ce2dd863
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status TensorOpFusionPass::RunOnNode(std::shared_ptr<MapOp> node, bool *modified) {
+  // Most primitive pattern: DecodeOp immediately followed by RandomCropAndResizeOp
+  // Abstract into a more general member function that can find any pattern, expressed
+  // by regular expressions, for instance.
+  // Add a list of optimisation policies. For now, just this lambda
+  auto FindPattern = [](auto &tfuncs) {
+    auto it =
+      std::find_if(tfuncs.begin(), tfuncs.end(), [](const auto &tf) -> bool { return tf->Name() == kDecodeOp; });
+    auto next = it + 1;
+    if (it != tfuncs.end() && next != tfuncs.end() && (*next)->Name() == kRandomCropAndResizeOp) {
+      return it;
+    } else {
+      return tfuncs.end();
+    }
+  };
+
+  auto &tfuncs = node->TFuncs();
+  auto it = FindPattern(tfuncs);
+  if (it != tfuncs.end()) {
+    auto next = it + 1;
+    auto op = static_cast<RandomCropAndResizeOp *>(next->get());
+    *it = std::static_pointer_cast<TensorOp>(std::make_shared<RandomCropDecodeResizeOp>(*op));
+    tfuncs.erase(next);
+  }
+  if (modified != nullptr) {
+    *modified = true;
+  } else {
+    RETURN_STATUS_UNEXPECTED("modified is nullptr");
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h
new file mode 100644
index 0000000000..a109af396c
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_TENSOR_OP_FUSION_PASS_H_
+#define DATASET_TENSOR_OP_FUSION_PASS_H_
+
+#include <memory>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+/// \class TensorOpFusionPass tensor_op_fusion_pass.h
+/// \brief And optional optimization pass identifying and fusing
+///     tensor ops within MapOp
+class TensorOpFusionPass : public NodePass {
+  /// \brief Identifies and fuses tensor ops within MapOp
+  /// \param[in] node The node being visited
+  /// \param[inout] *modified indicates whether the node has been visited
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MapOp> node, bool *modified) override;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_TENSOR_OP_FUSION_PASS_H_
diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
similarity index 53%
rename from mindspore/ccsrc/dataset/engine/opt/pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
index 27769f056b..4a8bbaf38f 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
@@ -14,23 +14,35 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/project_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/generator_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#endif
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -111,32 +123,39 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
+Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
+Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
+Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
+#ifdef ENABLE_PYTHON
+Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
+Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+#endif
+
+Status NodePass::RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
@@ -160,5 +179,70 @@ Status NodePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified)
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
similarity index 79%
rename from mindspore/ccsrc/dataset/engine/opt/pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
index 129c2fab37..845ab34d66 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
@@ -20,8 +20,8 @@
 #include <memory>
 #include <queue>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,18 +33,24 @@ class ProjectOp;
 
 class RenameOp;
 
-class FilterOp;
-
 class SkipOp;
 
 class ShuffleOp;
 
-class GeneratorOp;
-
 class MindRecordOp;
 
 class TFReaderOp;
 
+#ifdef ENABLE_PYTHON
+class FilterOp;
+
+class GeneratorOp;
+#endif
+
+class RandomDataOp;
+
+class RepeatOp;
+
 class TakeOp;
 
 class ZipOp;
@@ -53,6 +59,24 @@ class DeviceQueueOp;
 
 class ImageFolderOp;
 
+class CacheOp;
+
+class MnistOp;
+
+class ManifestOp;
+
+class CifarOp;
+
+class VOCOp;
+
+class CocoOp;
+
+class CelebAOp;
+
+class CacheMergeOp;
+
+class CacheLookupOp;
+
 // The base class Pass is the basic unit of tree transformation.
 // The actual implementation of the passes will be derived from here.
 class Pass : public std::enable_shared_from_this<Pass> {
@@ -122,26 +146,56 @@ class NodePass : public Pass {
 
   virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified);
 
-  virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
-
   virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified);
 
-  virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
-
   virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified);
 
+#ifdef ENABLE_PYTHON
+  virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
+#endif
+
+  virtual Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified);
+
   virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<DeviceQueueOp> node, bool *modified);
 
+  virtual Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified);
+
   virtual Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified);
 
+  virtual Status RunOnNode(std::shared_ptr<MnistOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CifarOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<VOCOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CocoOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified);
+
  private:
   // Helper function to perform DFS visit
   Status DFSNodeVisit(std::shared_ptr<DatasetOp> node, bool *modified);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
new file mode 100644
index 0000000000..59a3f71c53
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
@@ -0,0 +1,161 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/post/repeat_pass.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+RepeatPass::RepeatPass() : is_repeated_(false), nested_repeats_(0), is_merge_(false), cache_lookup_(nullptr) {}
+
+// Identifies the subtree below this node as being in a repeated path of the tree.
+Status RepeatPass::PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // If we are already repeated, then this is a nested repeat.
+  if (is_repeated_) {
+    nested_repeats_++;
+  }
+  is_repeated_ = true;
+  return Status::OK();
+}
+
+// Identifies the subtree below this node as being in a cache merge path
+Status RepeatPass::PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Turn on the flag that we're under a merge op
+  is_merge_ = true;
+  return Status::OK();
+}
+
+// Hooks up any identified eoe nodes under this repeat.
+Status RepeatPass::RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Pop the leaf ops from the save-area stack and add them to the repeat op's eoe node tracking
+  std::shared_ptr<DatasetOp> leaf_op = PopFromEOEOpStack();
+  while (leaf_op != nullptr) {
+    node->AddToEoeList(leaf_op);
+    leaf_op = PopFromEOEOpStack();
+  }
+
+  // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up
+  // and add it to the list of eoe/leaf ops for the repeat, removing it from the save area.
+  if (is_merge_ && cache_lookup_) {
+    cache_lookup_->set_control_flag(DatasetOp::kDeOpRepeated);
+    node->AddToEoeList(std::move(cache_lookup_));
+  }
+
+  // If we are a nested repeat, then we add ourself to the repeat stack for the next one above us.
+  // A nested repeat acts like an eoe/leaf for the repeat in the ascendant tree.
+  if (nested_repeats_ > 0) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    AddToEOEOpStack(node);
+    nested_repeats_--;
+  }
+
+  // If we are not nested, or we were the top-most repeat, now we clear the flag
+  if (nested_repeats_ == 0) {
+    is_repeated_ = false;
+  }
+
+  return Status::OK();
+}
+
+// CacheOp removes previous leaf ops and replaces them with itself
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    // if we are a cache within a repeat path of the tree, then there will be
+    // eoe-generating ops in the eoe op stack in the tree.  They are flagged as such so that the
+    // repeat or epoch ctrl operators can work with them for repeat activity during runtime.
+    // However, since a cache is present:
+    // - unflag those ops as being repeated ops
+    // - remove them from the eoe op stack so that repeat op above in the tree won't know about them
+    // - add ourself (the cache op), as an eoe op
+    // We do this so that those old leafs become 1-time use (up to eoe), never repeated.  Instead
+    // the repeating behaviours shall be invoked against the cache op.
+    std::shared_ptr<DatasetOp> leaf_op = PopFromEOEOpStack();
+    while (leaf_op != nullptr) {
+      leaf_op->ClearControlFlag(DatasetOp::kDeOpLastRepeat);
+      leaf_op->ClearControlFlag(DatasetOp::kDeOpRepeated);
+      leaf_op = PopFromEOEOpStack();
+    }
+    AddToEOEOpStack(std::static_pointer_cast<DatasetOp>(node));
+  }
+
+  return Status::OK();
+}
+
+// All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
+// for use with a controlling repeat above it.
+Status RepeatPass::RunOnNode(std::shared_ptr<DatasetOp> node, bool *modified) {
+  // If we are in a repeat path, then set our repeated flag
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+
+    // if we are a leaf node then save ourself in a stack for the repeat operator above us
+    if (node->IsLeaf()) {
+      AddToEOEOpStack(node);
+    }
+  }
+  return Status::OK();
+}
+
+// Turns off the tracking for operations under merge op
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Setting the flag is needed since we didn't call the base class DatasetOp version
+  if (is_repeated_) node->set_control_flag(DatasetOp::kDeOpRepeated);
+  is_merge_ = false;
+  cache_lookup_.reset();  // If a repeat op did not consume this then it's no longer needed
+  return Status::OK();
+}
+
+// Saves the lookup up in case it needs to be referenced by a repeat
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) {
+  if (!node->IsLeaf()) {
+    // By definition, the CacheLookup must be a leaf op.  Make that clear here.
+    RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!");
+  }
+
+  // If we are in a repeat path already, then there must be a repeat above the merge op
+  // In this case, we naturally are a repeating leaf op so add the required setup for leafs under repeat here.
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    AddToEOEOpStack(node);
+  } else {
+    // save the lookup op.  There could be a repeat in the cache miss leg of the merge op, in which case we
+    // may still need to be flagged as a repeating leaf.  We can't decide that here though, so save ourself
+    // into the pass so that the decision can be made during the processing of the cache miss leg of the merge.
+    cache_lookup_ = std::static_pointer_cast<DatasetOp>(node);
+  }
+  return Status::OK();
+}
+
+// Adds an operator to the eoe operator stack save area
+void RepeatPass::AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op) { eoe_stack_.push(dataset_op); }
+
+// Pops an operator from the eoe operator stack save area
+std::shared_ptr<DatasetOp> RepeatPass::PopFromEOEOpStack() {
+  std::shared_ptr<DatasetOp> top_op = nullptr;
+  if (!eoe_stack_.empty()) {
+    top_op = eoe_stack_.top();
+    eoe_stack_.pop();
+  }
+  return top_op;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
new file mode 100644
index 0000000000..9b733e2329
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
+#define DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
+
+#include <memory>
+#include <stack>
+#include <utility>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+/// \class RepeatPass repeat_pass.h
+/// \brief This is a NodePass who's job is to perform setup actions for RepeatOps. A RepeatOp needs to have references
+///     to the eoe-producing (typically leaf) nodes underneath it.
+class RepeatPass : public NodePass {
+ public:
+  /// \brief Constructor
+  RepeatPass();
+
+  /// \brief Identifies the subtree below this node as being in a repeated path of the tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) override;
+
+  /// \brief Identifies the subtree below this node as being in a cache merge path
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) override;
+
+  /// \brief Hooks up any identified eoe nodes under this repeat.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) override;
+
+  /// \brief CacheOp removes previous leaf ops and replaces them with itself
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Turns of the tracking for operations under merge op
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) override;
+
+  /// \brief Saves the lookup up in case it needs to be referenced by a repeat
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) override;
+
+  /// \brief All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
+  ///     for use with a controlling repeat above it.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<DatasetOp> node, bool *modified) override;
+
+ private:
+  /// \brief Adds an operator to the eoe operator stack save area
+  /// \param op - The dataset op to work add to eoe stack
+  /// \return Status - The error code return
+  void AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op);
+
+  /// \brief Pops an operator from the eoe operator stack save area
+  /// \return shared_ptr to the popped operator
+  std::shared_ptr<DatasetOp> PopFromEOEOpStack();
+
+  bool is_repeated_;                                  // T/F if we are processing under a repeat
+  bool is_merge_;                                     // T/F if we are processing under a cache merge op
+  int32_t nested_repeats_;                            // A counter for nested repeats
+  std::stack<std::shared_ptr<DatasetOp>> eoe_stack_;  // A save area for leaf/eoe ops
+  std::shared_ptr<DatasetOp> cache_lookup_;           // A save area for a cache lookup op
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc
new file mode 100644
index 0000000000..09b5f14a17
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc
@@ -0,0 +1,181 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/pre/cache_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Constructor
+CachePass::CachePass(CacheTransformPass *transform_pass)
+    : transform_pass_(transform_pass), is_caching_(false), leaf_op_(nullptr) {}
+
+// Identifies the subtree below this node as a cached descendant tree.
+Status CachePass::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Cache transform pass: CacheOp found, identified descendant tree.";
+  if (is_caching_) {
+    RETURN_STATUS_UNEXPECTED("Nested cache operations is not supported!");
+  }
+  is_caching_ = true;
+  return Status::OK();
+}
+
+// Resets the tracking of the cache within the tree and assigns the operators that will be involved in a cache
+// transformation
+Status CachePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  is_caching_ = false;  // We a no longer in a cache subtree.  clear the flag.
+  if (leaf_op_) {
+    MS_LOG(INFO) << "Cache transform pass: Set up transformation nodes for mappable cache.";
+    // Assign the leaf op into the transform pass, using move to null our copy of it, and also assign the cache op,
+    // using base class pointers.
+    transform_pass_->AddMappableCacheOperators(std::move(leaf_op_), node);
+  } else {
+    // If there was no leaf_op set, then this is a non-mappable scenario.
+
+    if (sampler_) {
+      // Grab the sampler that was saved from the leaf and plug it into the cache op
+      node->SetSampler(std::move(sampler_));
+      MS_LOG(INFO) << "Cache transform pass: Set up cache sampler from non-mappable leaf.";
+    } else {
+      // We're a cache op but no sampler was saved from leaf, so create a default sampler
+      int64_t num_samples = 0;
+      int64_t start_index = 0;
+      sampler_ = std::make_shared<SequentialSampler>(num_samples, start_index);
+      node->SetSampler(std::move(sampler_));
+      MS_LOG(INFO) << "Cache transform pass: Creating default sequential sampler for cache op.";
+    }
+
+    // Get the computed check sum from all ops in our cache path below us and ask the cache op to create it's cache
+    uint32_t cache_crc = DatasetOp::GenerateCRC(node);
+    RETURN_IF_NOT_OK(node->CreateCache(cache_crc));
+  }
+
+  return Status::OK();
+}
+
+// Common code for mappable leaf setup.
+Status CachePass::MappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op) {
+  // If a leaf has already been assigned, then we have more than one leaf inside this cache descendant tree.
+  if (is_caching_ && leaf_op_) {
+    RETURN_STATUS_UNEXPECTED("There is currently no support for multiple leaf nodes under cache.");
+  }
+
+  // If we are a leaf in the caching path, then save this leaf.
+  if (is_caching_) {
+    MS_LOG(DEBUG) << "Cache transform pass: Mappable leaf in a cache descendant tree detected";
+    leaf_op_ = std::move(leaf_op);
+  }
+  return Status::OK();
+}
+
+// Common code for non mappable leaf setup.
+Status CachePass::NonMappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op) {
+  // If a leaf has already been assigned, then we have more than one leaf inside this cache descendant tree.
+  if (is_caching_ && leaf_op_) {
+    RETURN_STATUS_UNEXPECTED("There is currently no support for multiple leaf nodes under cache.");
+  }
+
+  // Sampler for non mapable dataset only works if there is a downstream cache. Remove it from the leaf
+  // as save it for use by cache op in ascendant tree.
+  if (is_caching_) {
+    RETURN_IF_NOT_OK(leaf_op->FetchRemoveSampler(&sampler_));
+    MS_LOG(DEBUG) << "Cache transform pass: Non mappable leaf in a cache descendant tree detected";
+  } else {
+    // If we are a non-mappable leaf and are not in a cache tree, then this sampler is not used so we can
+    // remove it here.  The leaf itself will provide it's own methods of fetching the data (not sampler-based)
+    std::shared_ptr<Sampler> sampler_from_leaf;
+    RETURN_IF_NOT_OK(leaf_op->FetchRemoveSampler(&sampler_from_leaf));
+  }
+  return Status::OK();
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
+  if (is_caching_) {
+    // If we are a TF Reader in a caching tree, then change our config so that it becomes a basic
+    // TF reader that parses all files.  Selection of data will come from the sampler on the cache instead.
+    node->MakeSimpleProducer();
+  }
+  return NonMappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) {
+  return NonMappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h
new file mode 100644
index 0000000000..cbc805cd3e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h
@@ -0,0 +1,138 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_H_
+#define DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+class CacheTransformPass;
+
+/// \class CachePass cache_pass.h
+/// \brief This is a NodePass who's job is to identify and set up the nodes that will be involved in a cache
+///     transformation. It works in conjunction with the CacheTransformPass
+class CachePass : public NodePass {
+ public:
+  /// \brief Constructor
+  /// \param[in] transform_pass Raw pointer back to controlling tree pass
+  explicit CachePass(CacheTransformPass *transform_pass);
+
+  /// \brief Identifies the subtree below this node as a cached descendant tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Resets the tracking of the cache within the tree and assigns the operators that will be involved in a cache
+  ///     transformation
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
+
+ private:
+  /// \brief Common code for mappable leaf setup.
+  /// \param[in] node The leaf node performing setup work.
+  /// \return Status The error code return
+  Status MappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op);
+
+  /// \brief Common code for non-mappable leaf setup.
+  /// \param[in] node The leaf node performing setup work.
+  /// \return Status The error code return
+  Status NonMappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op);
+
+  bool is_caching_;
+  std::shared_ptr<DatasetOp> leaf_op_;
+  std::shared_ptr<Sampler> sampler_;
+  CacheTransformPass *transform_pass_;  // Back pointer to the owning transform pass
+};
+
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
new file mode 100644
index 0000000000..033150e8f4
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
@@ -0,0 +1,108 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include "minddata/dataset/engine/opt/pre/cache_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+// constructor
+CacheTransformPass::CacheTransformPass() {}
+
+// Runs a cache_pass first to set up the transformation nodes, and then drives any of these transformations
+Status CacheTransformPass::RunOnTree(ExecutionTree *tree, bool *modified) {
+  MS_LOG(INFO) << "Pre pass: Cache transform pass started.";
+  // Create the cache pass and run it.  The cache pass identifies and creates the leaf/cache pairs that we will
+  // use to execute a transform.
+  std::unique_ptr<Pass> cache_pass = std::make_unique<CachePass>(this);
+  RETURN_IF_NOT_OK(cache_pass->Run(tree, modified));
+
+  // Then, execute the transform for each pair
+  for (auto cache_pair : cache_pairs_) {
+    MS_LOG(DEBUG) << "Cache transform pass: Executing a cache op mappable transform.";
+    ExecuteCacheTransform(tree, cache_pair.first, cache_pair.second, cache_pair.second->cache_client());
+  }
+  MS_LOG(INFO) << "Pre pass: Cache transform pass complete.";
+  return Status::OK();
+}
+
+// Helper function to execute the cache transformation.
+Status CacheTransformPass::ExecuteCacheTransform(ExecutionTree *tree, std::shared_ptr<DatasetOp> leaf_op,
+                                                 std::shared_ptr<DatasetOp> cache_op,
+                                                 std::shared_ptr<CacheClient> cache_client) {
+  // Get local pointers the child/parent of the cache op.  It's possible that the parent is null if the cache was
+  // the root node.  It is also possible that cache_child == leaf_op
+  std::shared_ptr<DatasetOp> cache_child = cache_op->child(0);
+  DatasetOp *cache_parent = nullptr;
+  cache_op->Parent(&cache_parent, 0);  // fetch the cache op's parent
+
+  // Extract the sampler from the leaf.  We will overwrite this sampler with the lookup op later.
+  std::shared_ptr<Sampler> leaf_sampler = leaf_op->sampler();
+
+  // Construct the merge op with defaults
+  std::shared_ptr<CacheMergeOp> merge_op;
+  CacheMergeOp::Builder merge_builder;
+  RETURN_IF_NOT_OK(merge_builder.SetClient(cache_client).Build(&merge_op));
+  RETURN_IF_NOT_OK(tree->AssociateNode(merge_op));
+
+  // Construct the cache lookup op with defaults
+  std::shared_ptr<CacheLookupOp> cache_lookup_op;
+  CacheLookupOp::Builder lookup_builder;
+  RETURN_IF_NOT_OK(lookup_builder.SetClient(cache_client).SetSampler(std::move(leaf_sampler)).Build(&cache_lookup_op));
+  RETURN_IF_NOT_OK(tree->AssociateNode(cache_lookup_op));
+
+  // Overwrite the old sampler in this leaf op to become the lookup op
+  leaf_op->SetSampler(cache_lookup_op);
+
+  // If the cache had a parent, then go into that parent to remove the cache from it's child list and then
+  // replace it with the merge op.
+  if (cache_parent != nullptr) {
+    RETURN_IF_NOT_OK(cache_parent->RemoveChild(cache_op));
+    RETURN_IF_NOT_OK(cache_parent->AddChild(merge_op));
+  } else {
+    // If we didn't have a parent, then the merge op is the root node
+    RETURN_IF_NOT_OK(tree->AssignRoot(merge_op));
+  }
+
+  // Set the cache op to no longer be a parent over it's child. This will fully disconnect the old cache op.
+  // We maintain a local pointer to the old child though.
+  RETURN_IF_NOT_OK(cache_op->RemoveChild(cache_child));
+
+  // Connect the merge op
+  RETURN_IF_NOT_OK(merge_op->AddChild(std::move(cache_lookup_op)));
+  RETURN_IF_NOT_OK(merge_op->AddChild(std::move(cache_child)));
+
+  // At this point, the cache op has already had it's children and parents taken away. Calling remove
+  // on it at this point will not do any node hookups, and instead set internal fields to invalid.
+  RETURN_IF_NOT_OK(cache_op->Remove());
+
+  return Status::OK();
+}
+
+// Assigns the leaf and cache operators that are involved in a cache transformation
+void CacheTransformPass::AddMappableCacheOperators(std::shared_ptr<DatasetOp> leaf_op,
+                                                   std::shared_ptr<CacheOp> cache_op) {
+  cache_pairs_.push_back(std::make_pair(leaf_op, cache_op));
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
new file mode 100644
index 0000000000..02c22c4472
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
+#define DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+class DatasetOp;
+
+class CacheClient;
+
+/// \class CacheTransformPass cache_transform_pass.h
+/// \brief This is a tree pass that will invoke a tree transformation to inject the correct operators for caching
+///     operations
+class CacheTransformPass : public TreePass {
+ public:
+  /// \brief Constructor
+  CacheTransformPass();
+
+  /// \brief Runs a cache_pass first to set up the transformation nodes, and then drives any of these transformations
+  /// \param[inout] tree The tree to operate on.
+  /// \param[inout] Indicate of the tree was modified.
+  /// \return Status The error code return
+  Status RunOnTree(ExecutionTree *tree, bool *modified) override;
+
+  /// \brief Assigns the leaf and cache operators that are involved in a cache transformation
+  /// \param[in] leaf_op The leaf operator involved in the cache transform
+  /// \param[in] cache_op The cache operator involved in the cache transform
+  void AddMappableCacheOperators(std::shared_ptr<DatasetOp> leaf_op, std::shared_ptr<CacheOp> cache_op);
+
+ private:
+  /// \brief Helper function to execute the cache transformation.
+  ///
+  ///     Input:
+  ///       Sampler
+  ///         |
+  ///       LeafOp --> OtherOps --> CacheOp
+  ///
+  ///     Transformed:
+  ///       Sampler --> CacheLookupOp ---------------->
+  ///                           |                       |
+  ///                           |                       MergeOp
+  ///                           |                       |
+  ///                           LeafOp --> OtherOps -->
+  ///
+  /// \param[in] leaf_op The leaf node in the transform
+  /// \param[in] cache_op The cache op in the transform (will get removed)
+  /// \param[in] cache_client The cache client
+  /// \return Status The error code return
+  Status ExecuteCacheTransform(ExecutionTree *tree, std::shared_ptr<DatasetOp> leaf_op,
+                               std::shared_ptr<DatasetOp> cache_op, std::shared_ptr<CacheClient> cache_client);
+
+  // The two operators that work together to establish the cache transform
+  std::vector<std::pair<std::shared_ptr<DatasetOp>, std::shared_ptr<CacheOp>>> cache_pairs_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
similarity index 61%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
index 831a2a76ba..f04d7bc07d 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
@@ -15,21 +15,37 @@
  */
 
 #include <memory>
-#include "dataset/engine/opt/pre/removal_nodes.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/opt/pre/removal_nodes.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
 
 namespace mindspore {
 namespace dataset {
 
 RemovalNodes::RemovalNodes(RemovalPass *removal_pass) : removal_pass_(removal_pass), is_caching_(false) {}
 
+// Identifies the subtree below this node as a cached descendant tree.
+Status RemovalNodes::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Removal pass: CacheOp found, identified descendant tree.";
+  is_caching_ = true;
+  return Status::OK();
+}
+
+// Resets the tracking of the cache within the tree
+Status RemovalNodes::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Removal pass: cache descendant tree complete.";
+  is_caching_ = false;
+  return Status::OK();
+}
+
 // Perform ShuffleOp removal check.
 Status RemovalNodes::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   *modified = false;
   // If we are in a cache descendant tree, then this shuffle op needs to be removed
   if (is_caching_) {
-    MS_LOG(DEBUG) << "ShuffleOp identified for removal (CacheOp is in ascendant tree)";
+    MS_LOG(INFO) << "ShuffleOp identified for removal (CacheOp is in ascendant tree)";
     if (removal_pass_) {
       removal_pass_->AddToRemovalList(std::static_pointer_cast<DatasetOp>(node));
     } else {
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
similarity index 68%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
index 11ef37d80c..32025cd597 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
@@ -18,13 +18,11 @@
 #define DATASET_ENGINE_OPT_PASS_PRE_REMOVAL_NODES_H_
 
 #include <memory>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
 
 namespace mindspore {
 namespace dataset {
-
-class RemovalPass;
-
 /// \class RemovalNodes removal_nodes.h
 /// \brief This is a NodePass who's job is to identify which nodes should be removed.
 ///     It works in conjunction with the removal_pass.
@@ -34,6 +32,21 @@ class RemovalNodes : public NodePass {
   /// \param[in] removal_pass Raw pointer back to controlling tree pass
   explicit RemovalNodes(RemovalPass *removal_pass);
 
+  /// \brief Identifies the subtree below this node as a cached descendant tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Resets the tracking of the cache within the tree
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Destructor
+  ~RemovalNodes() = default;
+
   /// \brief Perform ShuffleOp removal check
   /// \param[in] node The node being visited
   /// \param[inout] modified Indicator if the node was changed at all
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
index 31ec31234f..0db422a7c2 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
@@ -16,9 +16,9 @@
 
 #include <vector>
 #include <algorithm>
-#include "dataset/engine/opt/pre/removal_nodes.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pre/removal_nodes.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
@@ -28,6 +28,7 @@ RemovalPass::RemovalPass() {}
 
 // Runs a removal_nodes pass first to find out which nodes to remove, then removes them.
 Status RemovalPass::RunOnTree(ExecutionTree *tree, bool *modified) {
+  MS_LOG(INFO) << "Pre pass: removal pass started.";
   // Create the removal node pass which can identify which nodes need to be removed.
   std::unique_ptr<Pass> removal_nodes = std::make_unique<RemovalNodes>(this);
   RETURN_IF_NOT_OK(removal_nodes->Run(tree, modified));
@@ -36,6 +37,7 @@ Status RemovalPass::RunOnTree(ExecutionTree *tree, bool *modified) {
   for (auto node : removal_nodes_) {
     node->Remove();
   }
+  MS_LOG(INFO) << "Pre pass: removal pass complete.";
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
index 6523ca69b2..bcab7cf08c 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -34,6 +34,9 @@ class RemovalPass : public TreePass {
   /// \brief Constructor
   RemovalPass();
 
+  /// \brief Destructor
+  ~RemovalPass() = default;
+
   /// \brief Runs a removal_nodes pass first to find out which nodes to remove, then removes them.
   /// \param[inout] tree The tree to operate on.
   /// \param[inout] Indicate of the tree was modified.
diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
index 852bc018b2..eb74d8fcc3 100644
--- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
@@ -15,7 +15,7 @@
  */
 
 #include <memory>
-#include "dataset/engine/opt/util/printer_pass.h"
+#include "minddata/dataset/engine/opt/util/printer_pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
   return Status::OK();
 }
 
-Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
-  *modified = false;
-  std::cout << "Visiting FilterOp" << '\n';
-  return Status::OK();
-}
-
 Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting SkipOp" << '\n';
@@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   return Status::OK();
 }
 
-Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
-  *modified = false;
-  std::cout << "Visiting GeneratorOp" << '\n';
-  return Status::OK();
-}
 Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting MindRecordOp" << '\n';
@@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified)
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
+Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting FilterOp" << '\n';
+  return Status::OK();
+}
+
+Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting GeneratorOp" << '\n';
+  return Status::OK();
+}
+#endif
+
 Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting TakeOp" << '\n';
diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
index fa04a88277..527df3ccc9 100644
--- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
@@ -18,7 +18,7 @@
 #define DATASET_ENGINE_OPT_PASS_UTIL_PRINTER_H
 
 #include <memory>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,18 +35,20 @@ class PrinterPass : public NodePass {
 
   Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override;
 
-  Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
-
   Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override;
 
-  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
-
   Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
 
+#ifdef ENABLE_PYTHON
+  Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
+
+  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
+#endif
+
   Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override;
diff --git a/mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/perf/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/perf/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/connector_size.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
index 0bd2754075..20b4908030 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_size.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/perf/connector_size.h"
+#include "minddata/dataset/engine/perf/connector_size.h"
 #include <algorithm>
 #include <fstream>
 #include <memory>
 #include <string>
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/path.h"
 
 using json = nlohmann::json;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.h b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/connector_size.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
index 2584289fb4..61ba06a76f 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_size.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 using json = nlohmann::json;
 
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
index 4fd59de390..b5e2efaf73 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
@@ -20,9 +20,9 @@
 #include <memory>
 #include <string>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/connector_throughput.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/connector_throughput.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/path.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.h b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/perf/connector_throughput.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
index e873eb8315..9cf387230a 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
@@ -22,16 +22,15 @@
 #include <fstream>
 #include <string>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/perf_data.h"
-#include "dataset/engine/perf/cyclic_array.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/perf_data.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 using json = nlohmann::json;
 namespace mindspore {
 namespace dataset {
-class ExecutionTree;
-
 // Connector throughput samples the output connector size of each op in the pipeline.
 // For the description of the data structure see perf_buffer.h
 // It support JSON serialization for external usage.
@@ -52,6 +51,10 @@ class ConnectorThroughput : public Sampling {
     timestamps_.AddSample(std::vector<TimePoint>(1));
     out_buffer_count_table_.AddSample(std::vector<int64_t>(n_nodes_));
   }
+
+  /// \brief Destructor
+  ~ConnectorThroughput() = default;
+
   // Driver function for connector size sampling.
   // This function samples the connector size of every nodes within the ExecutionTree
   Status Sample() override;
diff --git a/mindspore/ccsrc/dataset/engine/perf/cyclic_array.h b/mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/perf/cyclic_array.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
index fa60b401c5..2dfc3fd99d 100644
--- a/mindspore/ccsrc/dataset/engine/perf/cyclic_array.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
@@ -21,7 +21,7 @@
 #include <algorithm>
 #include <cstring>
 #include <type_traits>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
index 99b0c2d7e0..4491db144e 100644
--- a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
@@ -15,8 +15,8 @@
  */
 #include <fstream>
 #include <string>
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/path.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
index 129863c6d1..e7ba237a0a 100644
--- a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
index 204a83e3fb..776b483b79 100644
--- a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
@@ -16,8 +16,8 @@
 
 #include <fstream>
 #include <string>
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/util/path.h"
 namespace mindspore {
 namespace dataset {
 
diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
index 13ef7121c1..32f9d2d8c2 100644
--- a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/perf/monitor.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
index 8a0d682b81..7fa7e6fc78 100644
--- a/mindspore/ccsrc/dataset/engine/perf/monitor.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
@@ -15,9 +15,9 @@
  */
 
 #include <vector>
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/perf/monitor.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.h b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/monitor.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
index 8b4245db8e..1e669dad71 100644
--- a/mindspore/ccsrc/dataset/engine/perf/monitor.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
@@ -20,8 +20,8 @@
 #include <memory>
 #include <unordered_map>
 #include <vector>
-#include "dataset/util/status.h"
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/perf_data.h b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/perf/perf_data.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
index a201d705ea..8f215fd8df 100644
--- a/mindspore/ccsrc/dataset/engine/perf/perf_data.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
@@ -18,7 +18,7 @@
 #define DATASET_PERF_DATA_H
 
 #include <vector>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/profiling.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
index 66f27c46ba..f5c018c03b 100644
--- a/mindspore/ccsrc/dataset/engine/perf/profiling.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 #include <sys/time.h>
 #include <cstdlib>
 #include <fstream>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/engine/perf/monitor.h"
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/engine/perf/connector_size.h"
-#include "dataset/engine/perf/connector_throughput.h"
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/engine/perf/connector_size.h"
+#include "minddata/dataset/engine/perf/connector_throughput.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.h b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/perf/profiling.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
index e38c2d5e54..24f7f2efe8 100644
--- a/mindspore/ccsrc/dataset/engine/perf/profiling.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
@@ -21,7 +21,7 @@
 #include <unordered_map>
 #include <memory>
 #include <chrono>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/tdt/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/tdt/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/tdt/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
index ca9f2176f5..126291179a 100644
--- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/tdt/tdt_plugin.h"
+#include "minddata/dataset/engine/tdt/tdt_plugin.h"
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
index 304b205b81..a7db08b7f5 100644
--- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
@@ -24,9 +24,9 @@
 #include <vector>
 #include "tdt/tdt_host_interface.h"
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h
new file mode 120000
index 0000000000..22fe6d07e1
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h
@@ -0,0 +1 @@
+../../../core/constants.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h
new file mode 120000
index 0000000000..37a0e1b686
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h
@@ -0,0 +1 @@
+../../../core/data_type.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h
new file mode 120000
index 0000000000..1fb7a24d91
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h
@@ -0,0 +1 @@
+../../../core/tensor_shape.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h b/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h
new file mode 120000
index 0000000000..b06279c05b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h
@@ -0,0 +1 @@
+../../../util/status.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h
new file mode 100644
index 0000000000..6f38f5ea16
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -0,0 +1,357 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_INCLUDE_DATASETS_H_
+#define DATASET_INCLUDE_DATASETS_H_
+
+#include <vector>
+#include <memory>
+#include <set>
+#include <map>
+#include <utility>
+#include <string>
+#include "minddata/dataset/include/tensor.h"
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/include/samplers.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Forward declare
+class DatasetOp;
+class DataSchema;
+class Tensor;
+class TensorShape;
+
+namespace api {
+
+class TensorOperation;
+class SamplerObj;
+class ImageFolderDataset;
+class MnistDataset;
+class BatchDataset;
+class RepeatDataset;
+class MapDataset;
+class ShuffleDataset;
+class Cifar10Dataset;
+class ProjectDataset;
+
+/// \brief Function to create an ImageFolderDataset
+/// \notes A source dataset that reads images from a tree of directories
+///    All images within one folder have the same label
+///    The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] decode A flag to decode in ImageFolder
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \param[in] extensions File extensions to be read
+/// \param[in] class_indexing a class name to label map
+/// \return Shared pointer to the current ImageFolderDataset
+std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false,
+                                                std::shared_ptr<SamplerObj> sampler = nullptr,
+                                                std::set<std::string> extensions = {},
+                                                std::map<std::string, int32_t> class_indexing = {});
+
+/// \brief Function to create a MnistDataset
+/// \notes The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \return Shared pointer to the current MnistDataset
+std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr);
+
+/// \brief Function to create a Cifar10 Dataset
+/// \notes The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] num_samples The number of images to be included in the dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
+///    will be used to randomly iterate the entire dataset
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
+                                        std::shared_ptr<SamplerObj> sampler);
+
+/// \class Dataset datasets.h
+/// \brief A base class to represent a dataset in the data pipeline.
+class Dataset : public std::enable_shared_from_this<Dataset> {
+ public:
+  friend class Iterator;
+
+  /// \brief Constructor
+  Dataset();
+
+  /// \brief Destructor
+  ~Dataset() = default;
+
+  /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
+  /// \return shared pointer to the list of newly created DatasetOps
+  virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0;
+
+  /// \brief Pure virtual function for derived class to implement parameters validation
+  /// \return bool True if all the params are valid
+  virtual bool ValidateParams() = 0;
+
+  /// \brief Setter function for runtime number of workers
+  /// \param[in] num_workers The number of threads in this operator
+  /// \return Shared pointer to the original object
+  std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
+    num_workers_ = num_workers;
+    return shared_from_this();
+  }
+
+  /// \brief Function to create an Iterator over the Dataset pipeline
+  /// \return Shared pointer to the Iterator
+  std::shared_ptr<Iterator> CreateIterator();
+
+  /// \brief Function to create a BatchDataset
+  /// \notes Combines batch_size number of consecutive rows into batches
+  /// \param[in] batch_size Path to the root directory that contains the dataset
+  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
+  ///    batch. If true, and if there are less than batch_size rows
+  ///    available to make the last batch, then those rows will
+  ///    be dropped and not propagated to the next node
+  /// \return Shared pointer to the current BatchDataset
+  std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
+
+  /// \brief Function to create a RepeatDataset
+  /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
+  /// \param[in] count Number of times the dataset should be repeated
+  /// \return Shared pointer to the current Dataset
+  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
+  ///    due to a limitation in the current implementation
+  std::shared_ptr<Dataset> Repeat(int32_t count = -1);
+
+  /// \brief Function to create a MapDataset
+  /// \notes Applies each operation in operations to this dataset
+  /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
+  ///    applied in the order they appear in this list
+  /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
+  ///    operation as input. The size of this list must match the number of
+  ///    input columns expected by the first operator. The default input_columns
+  ///    is the first column
+  /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
+  ///    This parameter is mandatory if len(input_columns) != len(output_columns)
+  ///    The size of this list must match the number of output columns of the
+  ///    last operation. The default output_columns will have the same
+  ///    name as the input columns, i.e., the columns will be replaced
+  /// \param[in] project_columns A list of column names to project
+  /// \return Shared pointer to the current MapDataset
+  std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                  std::vector<std::string> input_columns = {},
+                                  std::vector<std::string> output_columns = {},
+                                  const std::vector<std::string> &project_columns = {});
+
+  /// \brief Function to create a Shuffle Dataset
+  /// \notes Randomly shuffles the rows of this dataset
+  /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
+  /// \return Shared pointer to the current ShuffleDataset
+  std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size);
+
+  /// \brief Function to create a Project Dataset
+  /// \notes Applies project to the dataset
+  /// \param[in] columns The name of columns to project
+  /// \return Shared pointer to the current Dataset
+  std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns);
+
+ protected:
+  std::vector<std::shared_ptr<Dataset>> children;
+  std::shared_ptr<Dataset> parent;
+
+  int32_t num_workers_;
+  int32_t rows_per_buffer_;
+  int32_t connector_que_size_;
+};
+
+/* ####################################### Derived Dataset classes ################################# */
+
+/// \class ImageFolderDataset
+/// \brief A Dataset derived class to represent ImageFolder dataset
+class ImageFolderDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
+                     std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
+
+  /// \brief Destructor
+  ~ImageFolderDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  bool decode_;
+  bool recursive_;
+  std::shared_ptr<SamplerObj> sampler_;
+  std::map<std::string, int32_t> class_indexing_;
+  std::set<std::string> exts_;
+};
+
+class MnistDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler);
+
+  /// \brief Destructor
+  ~MnistDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
+class BatchDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
+               std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
+
+  /// \brief Destructor
+  ~BatchDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  int32_t batch_size_;
+  bool drop_remainder_;
+  bool pad_;
+  std::vector<std::string> cols_to_map_;
+  std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
+};
+
+class RepeatDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  explicit RepeatDataset(uint32_t count);
+
+  /// \brief Destructor
+  ~RepeatDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  uint32_t repeat_count_;
+};
+
+class ShuffleDataset : public Dataset {
+ public:
+  ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch);
+
+  ~ShuffleDataset() = default;
+
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int32_t shuffle_size_;
+  uint32_t shuffle_seed_;
+  bool reset_every_epoch_;
+};
+
+class MapDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {},
+             std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {});
+
+  /// \brief Destructor
+  ~MapDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::shared_ptr<TensorOperation>> operations_;
+  std::vector<std::string> input_columns_;
+  std::vector<std::string> output_columns_;
+  std::vector<std::string> project_columns_;
+};
+
+class Cifar10Dataset : public Dataset {
+ public:
+  /// \brief Constructor
+  Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler);
+
+  /// \brief Destructor
+  ~Cifar10Dataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  int32_t num_samples_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
+class ProjectDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  explicit ProjectDataset(const std::vector<std::string> &columns);
+
+  /// \brief Destructor
+  ~ProjectDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::string> columns_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_INCLUDE_DATASETS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/iterator.h b/mindspore/ccsrc/minddata/dataset/include/iterator.h
new file mode 100644
index 0000000000..c3784821a6
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_INCLUDE_ITERATOR_H_
+#define DATASET_INCLUDE_ITERATOR_H_
+
+#include <unordered_map>
+#include <memory>
+#include <vector>
+#include <string>
+#include "minddata/dataset/include/status.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Forward declare
+class ExecutionTree;
+class DatasetIterator;
+class DatasetOp;
+class Tensor;
+
+namespace api {
+
+class Dataset;
+
+using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
+
+// Abstract class for iterating over the dataset.
+class Iterator {
+ public:
+  /// \brief Constructor
+  Iterator() = default;
+
+  /// \brief Destructor
+  ~Iterator() = default;
+
+  /// \brief Method for building and launching the pipeline.
+  /// \param[in] ops - a vector of DatasetOp in the data pipeline.
+  /// \return - a Status error code, returns OK if no error encountered.
+  Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
+
+  /// \brief Function to get the next row from the data pipeline.
+  /// \param[out] row - the output tensor row.
+  void GetNextRow(TensorMap *row);
+
+  /// \brief Function to shut down the data pipeline.
+  void Stop();
+
+  class _Iterator {
+   public:
+    explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
+      if (lt_) {
+        cur_row_ = new TensorMap();
+        lt_->GetNextRow(cur_row_);
+      }
+    }
+
+    // Destructor
+    ~_Iterator() {
+      if (cur_row_) {
+        delete cur_row_;
+      }
+    }
+
+    _Iterator &operator++() {
+      if (lt_) {
+        ++ind_;
+        lt_->GetNextRow(cur_row_);
+      }
+      if (cur_row_ && cur_row_->size() == 0) {
+        delete cur_row_;
+        cur_row_ = nullptr;
+      }
+      return *this;
+    }                                             // prefix ++ overload
+    TensorMap &operator*() { return *cur_row_; }  // dereference operator
+    TensorMap *operator->() { return cur_row_; }
+
+    bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
+
+   private:
+    int ind_;  // the cur node our Iterator points to
+    Iterator *lt_;
+    TensorMap *cur_row_;
+  };
+
+  _Iterator begin() { return _Iterator(this); }
+
+  _Iterator end() { return _Iterator(nullptr); }
+
+ private:
+  // Runtime tree.
+  // Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type.
+  std::shared_ptr<ExecutionTree> tree_;
+
+  // Runtime iterator
+  std::unique_ptr<DatasetIterator> iterator_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_INCLUDE_ITERATOR_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/samplers.h b/mindspore/ccsrc/minddata/dataset/include/samplers.h
new file mode 100644
index 0000000000..3d57e67059
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/samplers.h
@@ -0,0 +1,199 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_API_SAMPLERS_H_
+#define DATASET_API_SAMPLERS_H_
+
+#include <vector>
+#include <memory>
+
+namespace mindspore {
+namespace dataset {
+
+// Internal Sampler class forward declaration
+class Sampler;
+
+namespace api {
+
+class SamplerObj : public std::enable_shared_from_this<SamplerObj> {
+ public:
+  SamplerObj();
+
+  ~SamplerObj() = default;
+
+  virtual std::shared_ptr<Sampler> Build() = 0;
+  virtual bool ValidateParams() = 0;
+};
+
+class DistributedSamplerObj;
+class PKSamplerObj;
+class RandomSamplerObj;
+class SequentialSamplerObj;
+class SubsetRandomSamplerObj;
+class WeightedRandomSamplerObj;
+
+/// Function to create a Distributed Sampler.
+/// \notes A Sampler that access a shard of the dataset.
+/// \param[in] num_shards - Number of shards to divide the dataset into.
+/// \param[in] shard_id - Shard ID of the current shard within num_shards.
+/// \param[in] shuffle - If true, the indices are shuffled.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \param[in] seed - The seed in use when shuffle is true.
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true,
+                                                          int64_t num_samples = 0, uint32_t seed = 1);
+
+/// Function to create a PK Sampler.
+/// \notes Samples K elements for each P class in the dataset.
+///        This will sample all classes.
+/// \param[in] num_val - Number of elements to sample for each class.
+/// \param[in] shuffle - If true, the class IDs are shuffled.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
+
+/// Function to create a Random Sampler.
+/// \notes Samples the elements randomly.
+/// \param[in] replacement - If True, put the sample ID back for the next draw.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
+
+/// Function to create a Sequential Sampler.
+/// \notes Samples the dataset elements sequentially, same as not having a sampler.
+/// \param[in] start_index - Index to start sampling at (dafault to start at first id).
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
+
+/// Function to create a Subset Random Sampler.
+/// \notes Samples the elements randomly from a sequence of indices.
+/// \param[in] indices - A vector sequence of indices.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices,
+                                                            int64_t num_samples = 0);
+
+/// Function to create a Weighted Random Sampler.
+/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
+///        weights (probabilities).
+/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \param[in] replacement - If True, put the sample ID back for the next draw.
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights,
+                                                                int64_t num_samples = 0, bool replacement = true);
+
+/* ####################################### Derived Sampler classes ################################# */
+class DistributedSamplerObj : public SamplerObj {
+ public:
+  DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed);
+
+  ~DistributedSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t num_shards_;
+  int64_t shard_id_;
+  bool shuffle_;
+  int64_t num_samples_;
+  uint32_t seed_;
+};
+
+class PKSamplerObj : public SamplerObj {
+ public:
+  PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples);
+
+  ~PKSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t num_val_;
+  bool shuffle_;
+  int64_t num_samples_;
+};
+
+class RandomSamplerObj : public SamplerObj {
+ public:
+  RandomSamplerObj(bool replacement, int64_t num_samples);
+
+  ~RandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  bool replacement_;
+  int64_t num_samples_;
+};
+
+class SequentialSamplerObj : public SamplerObj {
+ public:
+  SequentialSamplerObj(int64_t start_index, int64_t num_samples);
+
+  ~SequentialSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t start_index_;
+  int64_t num_samples_;
+};
+
+class SubsetRandomSamplerObj : public SamplerObj {
+ public:
+  SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples);
+
+  ~SubsetRandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  const std::vector<int64_t> &indices_;
+  int64_t num_samples_;
+};
+
+class WeightedRandomSamplerObj : public SamplerObj {
+ public:
+  explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0,
+                                    bool replacement = true);
+
+  ~WeightedRandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  const std::vector<double> &weights_;
+  int64_t num_samples_;
+  bool replacement_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_API_SAMPLERS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/status.h b/mindspore/ccsrc/minddata/dataset/include/status.h
new file mode 120000
index 0000000000..bba92b63ad
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/status.h
@@ -0,0 +1 @@
+../util/status.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/tensor.h b/mindspore/ccsrc/minddata/dataset/include/tensor.h
new file mode 120000
index 0000000000..34b5e020a9
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/tensor.h
@@ -0,0 +1 @@
+../core/tensor.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h
new file mode 100644
index 0000000000..31531a20af
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h
@@ -0,0 +1,380 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_API_TRANSFORMS_H_
+#define DATASET_API_TRANSFORMS_H_
+
+#include <vector>
+#include <memory>
+#include "minddata/dataset/core/constants.h"
+
+namespace mindspore {
+namespace dataset {
+
+class TensorOp;
+
+namespace api {
+// Abstract class to represent a dataset in the data pipeline.
+class TensorOperation : public std::enable_shared_from_this<TensorOperation> {
+ public:
+  /// \brief Constructor
+  TensorOperation();
+
+  /// \brief Destructor
+  ~TensorOperation() = default;
+
+  /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
+  /// \return shared pointer to the newly created TensorOp.
+  virtual std::shared_ptr<TensorOp> Build() = 0;
+
+  virtual bool ValidateParams() = 0;
+};
+
+// Transform operations for performing computer vision.
+namespace vision {
+
+class NormalizeOperation;
+class DecodeOperation;
+class ResizeOperation;
+class RandomCropOperation;
+class CenterCropOperation;
+class UniformAugOperation;
+class RandomHorizontalFlipOperation;
+class RandomVerticalFlipOperation;
+class RandomRotationOperation;
+class PadOperation;
+class CutOutOperation;
+class RandomColorAdjustOperation;
+
+/// \brief Function to create a Normalize TensorOperation.
+/// \notes Normalize the input image with respect to mean and standard deviation.
+/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order.
+/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
+
+/// \brief Function to create a Decode TensorOperation.
+/// \notes Decode the input image in RGB mode.
+/// \param[in] rgb - a boolean of whether to decode in RGB mode or not.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
+
+/// \brief Function to create a Resize TensorOperation.
+/// \notes Resize the input image to the given size..
+/// \param[in] size - a vector representing the output size of the resized image.
+///               If size is a single value, the image will be resized to this value with
+///               the same image aspect ratio. If size has 2 values, it should be (height, width).
+/// \param[in] interpolation An enum for the mode of interpolation
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
+                                        InterpolationMode interpolation = InterpolationMode::kLinear);
+
+/// \brief Function to create a RandomCrop TensorOperation.
+/// \notes Crop the input image at a random location.
+/// \param[in] size - a vector representing the output size of the cropped image.
+///               If size is a single value, a square crop of size (size, size) is returned.
+///               If size has 2 values, it should be (height, width).
+/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided,
+///                  it pads the left, top, right and bottom respectively.
+/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than
+///                        the given output size.
+/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to
+///                     fill R, G, B channels respectively.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
+                                                bool pad_if_needed = false,
+                                                std::vector<uint8_t> fill_value = {0, 0, 0});
+
+/// \brief Function to create a CenterCrop TensorOperation.
+/// \notes Crops the input image at the center to the given size.
+/// \param[in] size - a vector representing the output size of the cropped image.
+///               If size is a single value, a square crop of size (size, size) is returned.
+///               If size has 2 values, it should be (height, width).
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
+
+/// \brief Function to create a UniformAugment TensorOperation.
+/// \notes Tensor operation to perform randomly selected augmentation.
+/// \param[in] operations - a vector of TensorOperation operations.
+/// \param[in] num_ops - integer representing the number of OPs to be selected and applied.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                                    int32_t num_ops = 2);
+
+/// \brief Function to create a RandomHorizontalFlip TensorOperation.
+/// \notes Tensor operation to perform random horizontal flip.
+/// \param[in] prob - float representing the probability of flip.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
+
+/// \brief Function to create a RandomVerticalFlip TensorOperation.
+/// \notes Tensor operation to perform random vertical flip.
+/// \param[in] prob - float representing the probability of flip.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
+
+/// \brief Function to create a RandomRotation TensorOp
+/// \notes Rotates the image according to parameters
+/// \param[in] degrees A float vector size 2, representing the starting and ending degree
+/// \param[in] resample An enum for the mode of interpolation
+/// \param[in] expand A boolean representing whether the image is expanded after rotation
+/// \param[in] center A float vector size 2, representing the x and y center of rotation.
+/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<RandomRotationOperation> RandomRotation(
+  std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
+  std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
+
+/// \brief Function to create a Pad TensorOp
+/// \notes Pads the image according to padding parameters
+/// \param[in] padding A vector representing the number of pixels to pad the image
+///    If vector has one value, it pads all sides of the image with that value
+///    If vector has two values, it pads left and right with the first and
+///    top and bottom with the second value
+///    If vector has four values, it pads left, top, right, and bottom with
+///    those values respectively
+/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
+///    BorderType.kConstant. If 3 values are provided,
+///    it is used to fill R, G, B channels respectively
+/// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
+///    Can be any of
+///    [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
+///    - BorderType.kConstant, means it fills the border with constant values
+///    - BorderType.kEdge, means it pads with the last value on the edge
+///    - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
+///    - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
+                                  BorderType padding_mode = BorderType::kConstant);
+
+/// \brief Function to create a CutOut TensorOp
+/// \notes Randomly cut (mask) out a given number of square patches from the input image
+/// \param[in] length Integer representing the side length of each square patch
+/// \param[in] num_patches Integer representing the number of patches to be cut out of an image
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
+
+/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
+/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
+///     Default value is {0, 0}
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
+                                                              std::vector<float> contrast = {1.0, 1.0},
+                                                              std::vector<float> saturation = {1.0, 1.0},
+                                                              std::vector<float> hue = {0.0, 0.0});
+
+/* ####################################### Derived TensorOperation classes ################################# */
+
+class NormalizeOperation : public TensorOperation {
+ public:
+  NormalizeOperation(std::vector<float> mean, std::vector<float> std);
+
+  ~NormalizeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> mean_;
+  std::vector<float> std_;
+};
+
+class DecodeOperation : public TensorOperation {
+ public:
+  explicit DecodeOperation(bool rgb = true);
+
+  ~DecodeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  bool rgb_;
+};
+
+class ResizeOperation : public TensorOperation {
+ public:
+  explicit ResizeOperation(std::vector<int32_t> size,
+                           InterpolationMode interpolation_mode = InterpolationMode::kLinear);
+
+  ~ResizeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+  InterpolationMode interpolation_;
+};
+
+class RandomCropOperation : public TensorOperation {
+ public:
+  RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
+                      bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0});
+
+  ~RandomCropOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+  std::vector<int32_t> padding_;
+  bool pad_if_needed_;
+  std::vector<uint8_t> fill_value_;
+};
+
+class CenterCropOperation : public TensorOperation {
+ public:
+  explicit CenterCropOperation(std::vector<int32_t> size);
+
+  ~CenterCropOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+};
+
+class UniformAugOperation : public TensorOperation {
+ public:
+  explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2);
+
+  ~UniformAugOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::shared_ptr<TensorOperation>> operations_;
+  int32_t num_ops_;
+};
+
+class RandomHorizontalFlipOperation : public TensorOperation {
+ public:
+  explicit RandomHorizontalFlipOperation(float probability = 0.5);
+
+  ~RandomHorizontalFlipOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  float probability_;
+};
+
+class RandomVerticalFlipOperation : public TensorOperation {
+ public:
+  explicit RandomVerticalFlipOperation(float probability = 0.5);
+
+  ~RandomVerticalFlipOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  float probability_;
+};
+
+class RandomRotationOperation : public TensorOperation {
+ public:
+  RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
+                          std::vector<float> center, std::vector<uint8_t> fill_value);
+
+  ~RandomRotationOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> degrees_;
+  InterpolationMode interpolation_mode_;
+  std::vector<float> center_;
+  bool expand_;
+  std::vector<uint8_t> fill_value_;
+};
+
+class PadOperation : public TensorOperation {
+ public:
+  PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
+               BorderType padding_mode = BorderType::kConstant);
+
+  ~PadOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> padding_;
+  std::vector<uint8_t> fill_value_;
+  BorderType padding_mode_;
+};
+
+class CutOutOperation : public TensorOperation {
+ public:
+  explicit CutOutOperation(int32_t length, int32_t num_patches = 1);
+
+  ~CutOutOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int32_t length_;
+  int32_t num_patches_;
+};
+
+class RandomColorAdjustOperation : public TensorOperation {
+ public:
+  RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
+                             std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
+
+  ~RandomColorAdjustOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> brightness_;
+  std::vector<float> contrast_;
+  std::vector<float> saturation_;
+  std::vector<float> hue_;
+};
+}  // namespace vision
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_API_TRANSFORMS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h b/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h
new file mode 120000
index 0000000000..f2c939bc0b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h
@@ -0,0 +1 @@
+../../../../utils/log_adapter.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/utils/overload.h b/mindspore/ccsrc/minddata/dataset/include/utils/overload.h
new file mode 120000
index 0000000000..7dc313d512
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/utils/overload.h
@@ -0,0 +1 @@
+../../../../utils/overload.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
similarity index 50%
rename from mindspore/ccsrc/dataset/kernels/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
index 2ebdd15e3c..8a9096ff23 100644
--- a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
@@ -2,7 +2,13 @@ add_subdirectory(image)
 add_subdirectory(data)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(kernels OBJECT
-    py_func_op.cc
-    tensor_op.cc)
-target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
+if (ENABLE_PYTHON)
+    add_library(kernels OBJECT
+        py_func_op.cc
+        tensor_op.cc)
+    target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
+else()
+    add_library(kernels OBJECT
+        tensor_op.cc)
+endif()
+
diff --git a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/data/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/data/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
index 87115fd3ce..0c91b38b2d 100644
--- a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
index 4e4c7ad4e0..46cc613049 100644
--- a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
@@ -21,8 +21,8 @@
 #include <vector>
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -55,6 +55,8 @@ class ConcatenateOp : public TensorOp {
   /// Number of inputs the tensor operation accepts
   uint32_t NumInput() override { return 0; }
 
+  std::string Name() const override { return kConcatenateOp; }
+
  private:
   int8_t axis_;
   std::shared_ptr<Tensor> prepend_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/data/data_utils.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
index 40eba1edf6..b1d51a6c08 100644
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
@@ -14,20 +14,22 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 #include <algorithm>
 #include <limits>
 #include <string>
 #include <vector>
 
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -113,22 +115,27 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
 }
 
 Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, std::shared_ptr<Tensor> fill_value) {
-  CHECK_FAIL_RETURN_UNEXPECTED(!((fill_value->type() == DataType::DE_STRING) && (input->type() != DataType::DE_STRING)),
+  const DataType &fill_type = fill_value->type();
+  const DataType &input_type = input->type();
+  const TensorShape &input_shape = input->shape();
+
+  CHECK_FAIL_RETURN_UNEXPECTED(!((fill_type == DataType::DE_STRING) && (input_type != DataType::DE_STRING)),
                                "Types do not match");
 
   CHECK_FAIL_RETURN_UNEXPECTED(fill_value->shape() == TensorShape({}), "fill_value is not a scalar");
 
-  std::shared_ptr<Tensor> out;
-
-  const DataType &to = input->type();
-  std::unique_ptr<TypeCastOp> op(new TypeCastOp(to));
+  std::shared_ptr<Tensor> out, fill_output;
 
-  std::shared_ptr<Tensor> fill_output;
-  RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output));
+  if (input_type != DataType::DE_STRING && fill_type != DataType::DE_STRING && input_type != fill_type) {
+    auto op = std::make_unique<TypeCastOp>(input_type);
+    RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output));
+  } else {
+    fill_output = fill_value;
+  }
 
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input->shape(), input->type()));
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input_shape, input_type));
 
-  switch (input->type().value()) {
+  switch (input_type.value()) {
     case DataType::DE_BOOL: {
       bool value = 0;
       RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {}));
@@ -206,10 +213,10 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
       std::string_view fill_string_view;
       RETURN_IF_NOT_OK(fill_value->GetItemAt(&fill_string_view, {}));
       std::string fill_string = std::string(fill_string_view);
-      for (int i = 0; i < input->shape().NumOfElements(); i++) {
+      for (int i = 0; i < input_shape.NumOfElements(); i++) {
         strings.emplace_back(fill_string);
       }
-      RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input->shape()));
+      RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input_shape));
       break;
     }
     case DataType::DE_UNKNOWN: {
diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
similarity index 97%
rename from mindspore/ccsrc/dataset/kernels/data/data_utils.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
index 6034e2a0eb..141545a583 100644
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
@@ -19,11 +19,11 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
index 959516a4aa..57a424704f 100644
--- a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
index 4c9d6d36c9..60b2d8c33b 100644
--- a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
@@ -18,9 +18,10 @@
 
 #include <vector>
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,6 +37,8 @@ class DuplicateOp : public TensorOp {
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
   uint32_t NumOutput() override { return 2; }
+
+  std::string Name() const override { return kDuplicateOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/data/fill_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
index 63895d3a95..f8dc746dff 100644
--- a/mindspore/ccsrc/dataset/kernels/data/fill_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/data/fill_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
index 03f59f3e67..af0d9e7941 100644
--- a/mindspore/ccsrc/dataset/kernels/data/fill_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
@@ -21,8 +21,8 @@
 #include <vector>
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +35,8 @@ class FillOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kFillOp; }
+
  private:
   std::shared_ptr<Tensor> fill_value_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/mask_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
index 2cfeb7e36f..2dbe501a47 100644
--- a/mindspore/ccsrc/dataset/kernels/data/mask_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/data/mask_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
index 0affe543bb..e6ac8c3964 100644
--- a/mindspore/ccsrc/dataset/kernels/data/mask_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -43,6 +43,8 @@ class MaskOp : public TensorOp {
 
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kMaskOp; }
+
  private:
   RelationalOp op_;
   std::shared_ptr<Tensor> value_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
index 65d1a183b3..e2b7b74a96 100644
--- a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
index 80494dc5c0..06a4823573 100644
--- a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -37,6 +37,8 @@ class OneHotOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kOneHotOp; }
+
  private:
   int num_classes_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
index 5b3b4cbe16..7b83137d88 100644
--- a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
index c6bc0c430e..c28f7250e0 100644
--- a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +38,8 @@ class PadEndOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kPadEndOp; }
+
  private:
   TensorShape output_shape_;
   std::shared_ptr<Tensor> pad_val_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/slice_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
index 2eebf26e84..66f48d5c2b 100644
--- a/mindspore/ccsrc/dataset/kernels/data/slice_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/data/slice_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
index 0a24ae171e..1cf99830c9 100644
--- a/mindspore/ccsrc/dataset/kernels/data/slice_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -71,6 +71,8 @@ class SliceOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kSliceOp; }
+
  private:
   // only on of the following will be valid
   // given indices to slice the Tensor. Empty vector if invalid.
diff --git a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
index 1cd79456e0..c52162b1aa 100644
--- a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/to_float16_op.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
index 3fca50bf07..91f660ca9c 100644
--- a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class ToFloat16Op : public TensorOp {
   void Print(std::ostream &out) const override { out << "ToFloat16Op"; }
 
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
+
+  std::string Name() const override { return kToFloat16Op; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
index 74c84a668a..5a58745293 100644
--- a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
index 1b3f2c3290..b82bc32342 100644
--- a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class TypeCastOp : public TensorOp {
   void Print(std::ostream &out) const override { out << "TypeCastOp"; }
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kTypeCastOp; }
+
  private:
   DataType type_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
index fef698912c..c0c575de9a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
@@ -15,7 +15,7 @@ add_library(kernels-image OBJECT
     random_crop_op.cc
     random_crop_with_bbox_op.cc
     random_horizontal_flip_op.cc
-    random_horizontal_flip_bbox_op.cc
+    random_horizontal_flip_with_bbox_op.cc
     bounding_box_augment_op.cc
     random_resize_op.cc
     random_rotation_op.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc
new file mode 100644
index 0000000000..618ed4d356
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <utility>
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/cv_tensor.h"
+
+namespace mindspore {
+namespace dataset {
+const float BoundingBoxAugmentOp::kDefRatio = 0.3;
+
+BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr<TensorOp> transform, float ratio)
+    : ratio_(ratio), uniform_(0, 1), transform_(std::move(transform)) {
+  rnd_.seed(GetSeed());
+}
+
+Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  BOUNDING_BOX_CHECK(input);  // check if bounding boxes are valid
+  uint32_t num_of_boxes = input[1]->shape()[0];
+  std::shared_ptr<Tensor> crop_out;
+  std::shared_ptr<Tensor> res_out;
+  std::shared_ptr<CVTensor> input_restore = CVTensor::AsCVTensor(input[0]);
+  for (uint32_t i = 0; i < num_of_boxes; i++) {
+    // using a uniform distribution to ensure op happens with probability ratio_
+    if (uniform_(rnd_) < ratio_) {
+      float min_x = 0;
+      float min_y = 0;
+      float b_w = 0;
+      float b_h = 0;
+      // get the required items
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {i, 0}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_y, {i, 1}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {i, 2}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_h, {i, 3}));
+      RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast<int>(min_x), static_cast<int>(min_y),
+                            static_cast<int>(b_w), static_cast<int>(b_h)));
+      // transform the cropped bbox region
+      RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out));
+      // place the transformed region back in the restored input
+      std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
+      // check if transformed crop is out of bounds of the box
+      if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
+          res_img->mat().rows < b_h) {
+        // if so, resize to fit in the box
+        std::shared_ptr<TensorOp> resize_op =
+          std::make_shared<ResizeOp>(static_cast<int32_t>(b_h), static_cast<int32_t>(b_w));
+        RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out));
+        res_img = CVTensor::AsCVTensor(res_out);
+      }
+      res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
+    }
+  }
+  (*output).push_back(std::move(std::static_pointer_cast<Tensor>(input_restore)));
+  (*output).push_back(input[1]);
+  return Status::OK();
+}
+
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
index 6c106f75dc..8e30c5738d 100644
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
@@ -20,11 +20,12 @@
 #include <memory>
 #include <random>
 #include <cstdlib>
+#include <string>
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -50,9 +51,12 @@ class BoundingBoxAugmentOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kBoundingBoxAugmentOp; }
+
  private:
   float ratio_;
   std::mt19937 rnd_;
+  std::uniform_real_distribution<float> uniform_;
   std::shared_ptr<TensorOp> transform_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
index a5129e9c71..35079b05cd 100644
--- a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
 #include <string>
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
index eb8e71ba7c..1f8cbcf230 100644
--- a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -39,6 +40,8 @@ class CenterCropOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kCenterCropOp; }
+
  private:
   int32_t crop_het_;
   int32_t crop_wid_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
index 74d9df5d6b..578138d427 100644
--- a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
index 2198f23e44..263cbdb27c 100644
--- a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -61,6 +61,8 @@ class CutOutOp : public TensorOp {
   // @return Status - The error code return
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kCutOutOp; }
+
  private:
   std::mt19937 rnd_;
   int32_t box_height_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/decode_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/decode_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
index ef6cf88b3b..5bc5377de9 100644
--- a/mindspore/ccsrc/dataset/kernels/image/decode_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/decode_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/decode_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
index 6e7180958a..29bf1d0146 100644
--- a/mindspore/ccsrc/dataset/kernels/image/decode_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,6 +41,8 @@ class DecodeOp : public TensorOp {
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kDecodeOp; }
+
  private:
   bool is_rgb_format_ = true;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
index 8ed2229cd1..5013958562 100644
--- a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
index 825ffa4443..0d5f70f895 100644
--- a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,6 +32,8 @@ class HwcToChwOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
+
+  std::string Name() const override { return kHwcToChwOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/image_utils.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
index ded9a8db11..ddbce3e23a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 #include <opencv2/imgproc/types_c.h>
 #include <algorithm>
 #include <vector>
@@ -21,11 +21,11 @@
 #include <utility>
 #include <opencv2/imgcodecs.hpp>
 #include "common/utils.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/random.h"
 
 #define MAX_INT_PRECISION 16777216  // float int precision is 16777216
 namespace mindspore {
@@ -121,14 +121,14 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
   }
 }
 
-bool HasJpegMagic(const std::shared_ptr<Tensor> &input) {
+bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
   const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
   constexpr size_t kJpegMagicLen = 3;
-  return input->SizeInBytes() >= kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
+  return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
 }
 
 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  if (HasJpegMagic(input)) {
+  if (IsNonEmptyJPEG(input)) {
     return JpegCropAndDecode(input, output);
   } else {
     return DecodeCv(input, output);
@@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
   TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
   auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
   const int buffer_size = output_tensor->SizeInBytes();
-  JSAMPLE *buffer = static_cast<JSAMPLE *>(reinterpret_cast<uchar *>(&(*output_tensor->begin<uint8_t>())));
+  JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
   const int max_scanlines_to_read = skipped_scanlines + crop_h;
   // stride refers to output tensor, which has 3 components at most
   const int stride = crop_w * kOutNumComponents;
@@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
     int num_channels = input_cv->shape()[2];
     if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
     *output = std::static_pointer_cast<Tensor>(output_cv);
-
     return Status::OK();
   } catch (const cv::Exception &e) {
     RETURN_STATUS_UNEXPECTED("Unexpected error in pad");
@@ -740,22 +739,16 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
                            int CB_Ymax) {
   // PASS LIST, COUNT OF BOUNDING BOXES
   // Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions
-  uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t;
-
+  float bb_Xmin = 0.0, bb_Ymin = 0.0, bb_Xmax = 0.0, bb_Ymax = 0.0;
   std::vector<int> correct_ind;
-  std::vector<uint32_t> copyVals;
+  std::vector<float> copyVals;
   dsize_t bboxDim = (*bboxList)->shape()[1];
   bool retFlag = false;  // true unless overlap found
   for (int i = 0; i < *bboxCount; i++) {
-    int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax;
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3}));
-    bb_Xmin = bb_Xmin_t;
-    bb_Ymin = bb_Ymin_t;
-    bb_Xmax = bb_Xmax_t;
-    bb_Ymax = bb_Ymax_t;
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Xmin, {i, 0}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Ymin, {i, 1}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Xmax, {i, 2}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Ymax, {i, 3}));
     bb_Xmax = bb_Xmin + bb_Xmax;
     bb_Ymax = bb_Ymin + bb_Ymax;
     // check for image / BB overlap
@@ -766,23 +759,31 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
     correct_ind.push_back(i);
     // adjust BBox corners by bringing into new CropBox if beyond
     // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin
-    bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - CB_Xmin)) + CB_Xmin);
-    bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - CB_Xmax)) + CB_Xmin);
-    bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - CB_Ymin)) + CB_Ymin);
-    bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - CB_Ymax)) + CB_Ymin);
+
+    bb_Xmin = bb_Xmin - std::min(static_cast<float>(0.0), (bb_Xmin - CB_Xmin)) - CB_Xmin;
+    bb_Xmax = bb_Xmax - std::max(static_cast<float>(0.0), (bb_Xmax - CB_Xmax)) - CB_Xmin;
+    bb_Ymin = bb_Ymin - std::min(static_cast<float>(0.0), (bb_Ymin - CB_Ymin)) - CB_Ymin;
+    bb_Ymax = bb_Ymax - std::max(static_cast<float>(0.0), (bb_Ymax - CB_Ymax)) - CB_Ymin;
+
+    // bound check for float values
+    bb_Xmin = std::max(bb_Xmin, static_cast<float>(0));
+    bb_Ymin = std::max(bb_Ymin, static_cast<float>(0));
+    bb_Xmax = std::min(bb_Xmax, static_cast<float>(CB_Xmax - CB_Xmin));  // find max value relative to new image
+    bb_Ymax = std::min(bb_Ymax, static_cast<float>(CB_Ymax - CB_Ymin));
+
     // reset min values and calculate width/height from Box corners
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, static_cast<uint32_t>(bb_Xmin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, static_cast<uint32_t>(bb_Ymin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 2}, static_cast<uint32_t>(bb_Xmax - bb_Xmin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 3}, static_cast<uint32_t>(bb_Ymax - bb_Ymin)));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, bb_Xmin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, bb_Ymin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 2}, bb_Xmax - bb_Xmin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 3}, bb_Ymax - bb_Ymin));
   }
   // create new tensor and copy over bboxes still valid to the image
   // bboxes outside of new cropped region are ignored - empty tensor returned in case of none
   *bboxCount = correct_ind.size();
-  uint32_t temp;
+  float temp = 0.0;
   for (auto slice : correct_ind) {  // for every index in the loop
     for (int ix = 0; ix < bboxDim; ix++) {
-      RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&temp, {slice, ix}));
+      RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&temp, {slice, ix}));
       copyVals.push_back(temp);
     }
   }
@@ -794,11 +795,11 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
 
 Status PadBBoxes(const std::shared_ptr<Tensor> *bboxList, const size_t &bboxCount, int32_t pad_top, int32_t pad_left) {
   for (int i = 0; i < bboxCount; i++) {
-    uint32_t xMin, yMin;
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&xMin, {i, 0}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&yMin, {i, 1}));
-    xMin += static_cast<uint32_t>(pad_left);  // should not be negative
-    yMin += static_cast<uint32_t>(pad_top);
+    float xMin = 0.0, yMin = 0.0;
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&xMin, {i, 0}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&yMin, {i, 1}));
+    xMin += pad_left;
+    yMin += pad_top;
     RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, xMin));
     RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, yMin));
   }
@@ -807,16 +808,16 @@ Status PadBBoxes(const std::shared_ptr<Tensor> *bboxList, const size_t &bboxCoun
 
 Status UpdateBBoxesForResize(const std::shared_ptr<Tensor> &bboxList, const size_t &bboxCount, int32_t target_width_,
                              int32_t target_height_, int orig_width, int orig_height) {
-  uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth;
-  // cast to float to preseve fractional
-  double W_aspRatio = (target_width_ * 1.0) / (orig_width * 1.0);
-  double H_aspRatio = (target_height_ * 1.0) / (orig_height * 1.0);
+  float bb_Xmin = 0, bb_Ymin = 0, bb_Xwidth = 0, bb_Ywidth = 0;
+  // cast to float to preserve fractional
+  float W_aspRatio = (target_width_ * 1.0) / (orig_width * 1.0);
+  float H_aspRatio = (target_height_ * 1.0) / (orig_height * 1.0);
   for (int i = 0; i < bboxCount; i++) {
     // for each bounding box
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xmin, {i, 0}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ymin, {i, 1}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xwidth, {i, 2}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ywidth, {i, 3}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Xmin, {i, 0}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Ymin, {i, 1}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Xwidth, {i, 2}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Ywidth, {i, 3}));
     // update positions and widths
     bb_Xmin = bb_Xmin * W_aspRatio;
     bb_Ymin = bb_Ymin * H_aspRatio;
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
similarity index 97%
rename from mindspore/ccsrc/dataset/kernels/image/image_utils.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
index 57ffce6a12..f489c7367b 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@@ -29,16 +29,12 @@
 #include "./jpeglib.h"
 #include "./jerror.h"
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
-enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
-
-enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
-
 void JpegErrorExitCustom(j_common_ptr cinfo);
 
 struct JpegErrorManagerCustom {
@@ -96,7 +92,7 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 
 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
 
-bool HasJpegMagic(const std::shared_ptr<Tensor> &input);
+bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input);
 
 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);
 
diff --git a/mindspore/ccsrc/dataset/kernels/image/normalize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/normalize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
index 638eaad264..de5deb31ef 100644
--- a/mindspore/ccsrc/dataset/kernels/image/normalize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
 
 #include <random>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/normalize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/kernels/image/normalize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
index 7aa6fa69bd..7821869c8f 100644
--- a/mindspore/ccsrc/dataset/kernels/image/normalize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
@@ -17,11 +17,12 @@
 #define DATASET_KERNELS_IMAGE_NORMALIZE_OP_H_
 
 #include <memory>
+#include <string>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +36,8 @@ class NormalizeOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kNormalizeOp; }
+
  private:
   std::shared_ptr<CVTensor> mean_;
   std::shared_ptr<CVTensor> std_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/pad_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
index b4d9c2bbf0..52f32e2b1b 100644
--- a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
@@ -13,10 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/pad_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
index 76d99d0162..9437058406 100644
--- a/mindspore/ccsrc/dataset/kernels/image/pad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -53,6 +54,8 @@ class PadOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kPadOp; }
+
  private:
   int32_t pad_top_;
   int32_t pad_bottom_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
index e420f86e9a..6dbf30c33e 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
index 74d1ec450b..fb29b57062 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -57,6 +57,8 @@ class RandomColorAdjustOp : public TensorOp {
   // @return Status - The error code return.
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomColorAdjustOp; }
+
  private:
   std::mt19937 rnd_;
   float bright_factor_start_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
index c5b5f20c63..8a7364d666 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
 #include <random>
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
similarity index 84%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
index db805a9374..41d775fdf7 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,12 @@ class RandomCropAndResizeOp : public TensorOp {
                         float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
                         InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
 
+  RandomCropAndResizeOp() = default;
+
+  RandomCropAndResizeOp(const RandomCropAndResizeOp &rhs) = default;
+
+  RandomCropAndResizeOp(RandomCropAndResizeOp &&rhs) = default;
+
   ~RandomCropAndResizeOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -52,6 +59,8 @@ class RandomCropAndResizeOp : public TensorOp {
 
   Status GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width);
 
+  std::string Name() const override { return kRandomCropAndResizeOp; }
+
  protected:
   int32_t target_height_;
   int32_t target_width_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
index fbaf2c9326..98bfe41241 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
@@ -17,10 +17,10 @@
 #include <random>
 #include <utility>
 
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
index 9675d43933..ddaac10fac 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
@@ -16,7 +16,8 @@
 #ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_
 #define DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_
 
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include <string>
 
 namespace mindspore {
 namespace dataset {
@@ -39,6 +40,8 @@ class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp {
   }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kRandomCropAndResizeWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
index 74aa91ea7e..d62aebd37f 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
 #include <random>
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
   if (input == nullptr) {
     RETURN_STATUS_UNEXPECTED("input tensor is null");
   }
-  if (!HasJpegMagic(input)) {
+  if (!IsNonEmptyJPEG(input)) {
     DecodeOp op(true);
     std::shared_ptr<Tensor> decoded;
     RETURN_IF_NOT_OK(op.Compute(input, &decoded));
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
similarity index 78%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
index 9566169946..863fd48c14 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
@@ -20,12 +20,12 @@
 #include <random>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +35,8 @@ class RandomCropDecodeResizeOp : public RandomCropAndResizeOp {
                            float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
                            InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
 
+  explicit RandomCropDecodeResizeOp(const RandomCropAndResizeOp &rhs) : RandomCropAndResizeOp(rhs) {}
+
   ~RandomCropDecodeResizeOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -43,6 +45,8 @@ class RandomCropDecodeResizeOp : public RandomCropAndResizeOp {
   }
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kRandomCropDecodeResizeOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
index 110d769f26..51772e9ec3 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 #include <random>
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
index cd43ec1efb..44f1789f9d 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -45,6 +46,10 @@ class RandomCropOp : public TensorOp {
                BorderType border_types = kDefBorderType, bool pad_if_needed = kDefPadIfNeeded,
                uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, uint8_t fill_b = kDefFillB);
 
+  RandomCropOp(const RandomCropOp &rhs) = default;
+
+  RandomCropOp(RandomCropOp &&rhs) = default;
+
   ~RandomCropOp() override = default;
 
   void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; }
@@ -72,6 +77,8 @@ class RandomCropOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kRandomCropOp; }
+
  protected:
   int32_t crop_height_ = 0;
   int32_t crop_width_ = 0;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
index c873307afd..08b12b8b70 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
@@ -18,10 +18,10 @@
 #include <algorithm>
 #include <utility>
 
-#include "dataset/kernels/image/random_crop_with_bbox_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
index 88a58d3557..bfcd1610d3 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
@@ -18,8 +18,9 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,8 @@ class RandomCropWithBBoxOp : public RandomCropOp {
   }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kRandomCropWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
index ae76e1bf59..5e8ab8a634 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
index efea124533..9e08929180 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -47,6 +48,8 @@ class RandomHorizontalFlipOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomHorizontalFlipOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
similarity index 68%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
index 5a5c632e81..809f564b18 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
@@ -14,11 +14,10 @@
  * limitations under the License.
  */
 #include <utility>
-#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/pybind_support.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,21 +30,19 @@ Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow
     // To test bounding boxes algorithm, create random bboxes from image dims
     size_t num_of_boxes = input[1]->shape()[0];      // set to give number of bboxes
     float img_center = (input[0]->shape()[1] / 2.);  // get the center of the image
-
     for (int i = 0; i < num_of_boxes; i++) {
-      uint32_t b_w = 0;  // bounding box width
-      uint32_t min_x = 0;
+      float b_w = 0;  // bounding box width
+      float min_x = 0;
       // get the required items
-      input[1]->GetItemAt<uint32_t>(&min_x, {i, 0});
-      input[1]->GetItemAt<uint32_t>(&b_w, {i, 2});
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {i, 0}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {i, 2}));
       // do the flip
-      float diff = img_center - min_x;          // get distance from min_x to center
-      uint32_t refl_min_x = diff + img_center;  // get reflection of min_x
-      uint32_t new_min_x = refl_min_x - b_w;    // subtract from the reflected min_x to get the new one
-      input[1]->SetItemAt<uint32_t>({i, 0}, new_min_x);
+      float diff = img_center - min_x;       // get distance from min_x to center
+      float refl_min_x = diff + img_center;  // get reflection of min_x
+      float new_min_x = refl_min_x - b_w;    // subtract from the reflected min_x to get the new one
+      RETURN_IF_NOT_OK(input[1]->SetItemAt<float>({i, 0}, new_min_x));
     }
-    (*output).push_back(nullptr);
-    (*output).push_back(nullptr);
+    (*output).resize(2);
     // move input to output pointer of bounding boxes
     (*output)[1] = std::move(input[1]);
     // perform HorizontalFlip on the image
@@ -55,6 +52,5 @@ Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow
   *output = input;
   return Status::OK();
 }
-
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
index 06c96e11ae..d98669ea13 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
@@ -16,18 +16,15 @@
 #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
 #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
 
-#include <pybind11/numpy.h>
-#include <pybind11/stl.h>
 #include <memory>
 #include <random>
 #include <cstdlib>
+#include <string>
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl_bind.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -52,6 +49,8 @@ class RandomHorizontalFlipWithBBoxOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomHorizontalFlipWithBBoxOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
index c14224a930..8736f0a6a5 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
index af23803d4c..8b2b067751 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
@@ -18,12 +18,13 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -45,6 +46,8 @@ class RandomResizeOp : public ResizeOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomResizeOp; }
+
  private:
   std::mt19937 random_generator_;
   std::uniform_int_distribution<int> distribution_{0, 3};
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
index de69c02e39..e099b78a0f 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/random_resize_with_bbox_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
index 4a7614525f..6bad0d30fa 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
@@ -19,13 +19,14 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -46,6 +47,8 @@ class RandomResizeWithBBoxOp : public ResizeWithBBoxOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomResizeWithBBoxOp; }
+
  private:
   std::mt19937 random_generator_;
   std::uniform_int_distribution<int> distribution_{0, 3};
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
index 65e024865b..b2cb4facae 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
 
 #include <random>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
index d30cd24288..ea679ccb56 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -68,6 +69,8 @@ class RandomRotationOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kRandomRotationOp; }
+
  private:
   float degree_start_;
   float degree_end_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
index 096923a9ec..24d816ef1a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
index 18693bc0eb..cee5869c71 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,8 @@ class RandomVerticalFlipOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomVerticalFlipOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
similarity index 77%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
index ffea851eac..7d2fa7bab5 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
@@ -16,9 +16,9 @@
 
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -34,14 +34,13 @@ Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *
     // one time allocation -> updated in the loop
     // type defined based on VOC test dataset
     for (int i = 0; i < boxCount; i++) {
-      uint32_t boxCorner_y = 0;
-      uint32_t boxHeight = 0;
-      uint32_t newBoxCorner_y = 0;
-      RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1}));  // get min y of bbox
-      RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxHeight, {i, 3}));    // get height of bbox
+      float boxCorner_y = 0.0, boxHeight = 0.0;
+      float newBoxCorner_y = 0.0;
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&boxCorner_y, {i, 1}));  // get min y of bbox
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&boxHeight, {i, 3}));    // get height of bbox
 
       // subtract (curCorner + height) from (max) for new Corner position
-      newBoxCorner_y = (imHeight - 1) - ((boxCorner_y + boxHeight) - 1);
+      newBoxCorner_y = (imHeight - 1.0) - ((boxCorner_y + boxHeight) - 1.0);
       RETURN_IF_NOT_OK(input[1]->SetItemAt({i, 1}, newBoxCorner_y));
     }
 
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
index 4764cc2b75..c9f19f5217 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +43,8 @@ class RandomVerticalFlipWithBBoxOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomVerticalFlipWithBBoxOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/rescale_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/rescale_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
index fd1807991c..2a500d6c34 100644
--- a/mindspore/ccsrc/dataset/kernels/image/rescale_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/rescale_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/rescale_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
index 8aee75b0c1..c70b7bf6cf 100644
--- a/mindspore/ccsrc/dataset/kernels/image/rescale_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +39,8 @@ class RescaleOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kRescaleOp; }
+
  private:
   float rescale_;
   float shift_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
index 658caac6a5..48a8fbbc53 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
 #include <random>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
index c8c2a5185b..fd8f940946 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
@@ -20,10 +20,10 @@
 #include <random>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -51,6 +51,8 @@ class ResizeBilinearOp : public ResizeOp {
   // Name: Print()
   // Description: A function that prints info about the node
   void Print(std::ostream &out) const override;
+
+  std::string Name() const override { return kResizeBilinearOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/image/resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
index 7c0252188e..7456f50f32 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
index 5a35a6076c..3f847243ff 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -43,6 +44,10 @@ class ResizeOp : public TensorOp {
   explicit ResizeOp(int32_t size1, int32_t size2 = kDefWidth, InterpolationMode mInterpolation = kDefInterpolation)
       : size1_(size1), size2_(size2), interpolation_(mInterpolation) {}
 
+  ResizeOp(const ResizeOp &rhs) = default;
+
+  ResizeOp(ResizeOp &&rhs) = default;
+
   ~ResizeOp() override = default;
 
   void Print(std::ostream &out) const override { out << "ResizeOp: " << size1_ << " " << size2_; }
@@ -50,6 +55,8 @@ class ResizeOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kResizeOp; }
+
  protected:
   int32_t size1_;
   int32_t size2_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
similarity index 80%
rename from mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
index 8a633d5678..9df2d8a25e 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
 #include <utility>
 #include <memory>
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/pybind_support.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
index 17bdd01ef1..d2b5c96bf3 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
@@ -16,11 +16,12 @@
 #ifndef DATASET_KERNELS_IMAGE_RESIZE_WITH_BBOX_OP_H
 #define DATASET_KERNELS_IMAGE_RESIZE_WITH_BBOX_OP_H
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/resize_op.h"
+#include <string>
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,6 +37,8 @@ class ResizeWithBBoxOp : public ResizeOp {
   void Print(std::ostream &out) const override { out << "ResizeWithBBoxOp: " << size1_ << " " << size2_; }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kResizeWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
index 7889b3b157..95d75af0f2 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 #include <utility>
-#include "dataset/kernels/image/uniform_aug_op.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
index 824898ba2d..0ae0fda92b 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -46,6 +46,8 @@ class UniformAugOp : public TensorOp {
   // @return Status - The error code return
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kUniformAugOp; }
+
  private:
   int32_t num_ops_;
   std::vector<std::shared_ptr<TensorOp>> tensor_op_list_;
diff --git a/mindspore/ccsrc/dataset/kernels/no_op.h b/mindspore/ccsrc/minddata/dataset/kernels/no_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/no_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/no_op.h
index bfbdf43b36..f5a6a58f2b 100644
--- a/mindspore/ccsrc/dataset/kernels/no_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/no_op.h
@@ -17,9 +17,10 @@
 #define DATASET_KERNELS_NO_OP_H_
 
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,6 +32,8 @@ class NoOp : public TensorOp {
   }
 
   void Print(std::ostream &out) const override { out << "NoOp"; };
+
+  std::string Name() const override { return kNoOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/py_func_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
index 0a6a1452b5..f501dd4b4f 100644
--- a/mindspore/ccsrc/dataset/kernels/py_func_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/py_func_op.h"
+#include "minddata/dataset/kernels/py_func_op.h"
 
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.h b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/py_func_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
index a50aceafbb..75d222b433 100644
--- a/mindspore/ccsrc/dataset/kernels/py_func_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
@@ -20,9 +20,10 @@
 #include <memory>
 #include <vector>
 #include <utility>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +39,8 @@ class __attribute__((visibility("hidden"))) PyFuncOp : public TensorOp {
   // Compute function for n-n mapping.
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kPyFuncOp; }
+
  private:
   py::function py_func_ptr_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/kernels/tensor_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
index 92aef8dc9e..b625e3b532 100644
--- a/mindspore/ccsrc/dataset/kernels/tensor_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include <iostream>
 #include <memory>
 #include <mutex>
diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
similarity index 65%
rename from mindspore/ccsrc/dataset/kernels/tensor_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
index 9aae50d6b0..3bcba4b463 100644
--- a/mindspore/ccsrc/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/util/status.h"
 
 #define IO_CHECK(input, output)                             \
   do {                                                      \
@@ -62,14 +62,16 @@
     uint32_t img_h = input[0]->shape()[0];                                                  \
     uint32_t img_w = input[0]->shape()[1];                                                  \
     for (uint32_t i = 0; i < num_of_boxes; i++) {                                           \
-      uint32_t min_x = 0;                                                                   \
-      uint32_t min_y = 0;                                                                   \
-      uint32_t b_w = 0;                                                                     \
-      uint32_t b_h = 0;                                                                     \
-      input[1]->GetItemAt<uint32_t>(&min_x, {i, 0});                                        \
-      input[1]->GetItemAt<uint32_t>(&min_y, {i, 1});                                        \
-      input[1]->GetItemAt<uint32_t>(&b_w, {i, 2});                                          \
-      input[1]->GetItemAt<uint32_t>(&b_h, {i, 3});                                          \
+      float min_x = 0.0, min_y = 0.0, b_w = 0.0, b_h = 0.0;                                 \
+      bool passing_data_fetch = true;                                                       \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&min_x, {i, 0}).IsOk();              \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&min_y, {i, 1}).IsOk();              \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&b_w, {i, 2}).IsOk();                \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&b_h, {i, 3}).IsOk();                \
+      if (!passing_data_fetch) {                                                            \
+        return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,                     \
+                      "Fetching BBox values failed in BOUNDING_BOX_CHECK.");                \
+      }                                                                                     \
       if ((min_x + b_w > img_w) || (min_y + b_h > img_h)) {                                 \
         return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__,              \
                       "At least one of the bounding boxes is out of bounds of the image."); \
@@ -83,6 +85,66 @@
 
 namespace mindspore {
 namespace dataset {
+
+// image
+constexpr char kBoundingBoxAugmentOp[] = "BoundingBoxAugmentOp";
+constexpr char kDecodeOp[] = "DecodeOp";
+constexpr char kCenterCropOp[] = "CenterCropOp";
+constexpr char kCutOutOp[] = "CutOutOp";
+constexpr char kHwcToChwOp[] = "HwcToChwOp";
+constexpr char kNormalizeOp[] = "NormalizeOp";
+constexpr char kPadOp[] = "PadOp";
+constexpr char kRandomColorAdjustOp[] = "RandomColorAdjustOp";
+constexpr char kRandomCropAndResizeOp[] = "RandomCropAndResizeOp";
+constexpr char kRandomCropAndResizeWithBBoxOp[] = "RandomCropAndResizeWithBBoxOp";
+constexpr char kRandomCropDecodeResizeOp[] = "RandomCropDecodeResizeOp";
+constexpr char kRandomCropOp[] = "RandomCropOp";
+constexpr char kRandomCropWithBBoxOp[] = "RandomCropWithBBoxOp";
+constexpr char kRandomHorizontalFlipWithBBoxOp[] = "RandomHorizontalFlipWithBBoxOp";
+constexpr char kRandomHorizontalFlipOp[] = "RandomHorizontalFlipOp";
+constexpr char kRandomResizeOp[] = "RandomResizeOp";
+constexpr char kRandomResizeWithBBoxOp[] = "RandomResizeWithBBoxOp";
+constexpr char kRandomRotationOp[] = "RandomRotationOp";
+constexpr char kRandomVerticalFlipOp[] = "RandomVerticalFlipOp";
+constexpr char kRandomVerticalFlipWithBBoxOp[] = "RandomVerticalFlipWithBBoxOp";
+constexpr char kRescaleOp[] = "RescaleOp";
+constexpr char kResizeBilinearOp[] = "ResizeBilinearOp";
+constexpr char kResizeOp[] = "ResizeOp";
+constexpr char kResizeWithBBoxOp[] = "ResizeWithBBoxOp";
+constexpr char kUniformAugOp[] = "UniformAugOp";
+
+// text
+constexpr char kBasicTokenizerOp[] = "BasicTokenizerOp";
+constexpr char kBertTokenizerOp[] = "BertTokenizerOp";
+constexpr char kCaseFoldOp[] = "CaseFoldOp";
+constexpr char kJiebaTokenizerOp[] = "JiebaTokenizerOp";
+constexpr char kLookupOp[] = "LookupOp";
+constexpr char kNgramOp[] = "NgramOp";
+constexpr char kNormalizeUTF8Op[] = "NormalizeUTF8Op";
+constexpr char kRegexReplaceOp[] = "RegexReplaceOp";
+constexpr char kRegexTokenizerOp[] = "RegexTokenizerOp";
+constexpr char kToNumberOp[] = "ToNumberOp";
+constexpr char kTruncateSequencePairOp[] = "TruncateSequencePairOp";
+constexpr char kUnicodeCharTokenizerOp[] = "UnicodeCharTokenizerOp";
+constexpr char kUnicodeScriptTokenizerOp[] = "UnicodeScriptTokenizerOp";
+constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp";
+constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp";
+
+// data
+constexpr char kConcatenateOp[] = "kConcatenateOp";
+constexpr char kDuplicateOp[] = "DuplicateOp";
+constexpr char kFillOp[] = "FillOp";
+constexpr char kMaskOp[] = "MaskOp";
+constexpr char kOneHotOp[] = "OneHotOp";
+constexpr char kPadEndOp[] = "PadEndOp";
+constexpr char kSliceOp[] = "SliceOp";
+constexpr char kToFloat16Op[] = "ToFloat16Op";
+constexpr char kTypeCastOp[] = "TypeCastOp";
+
+// other
+constexpr char kPyFuncOp[] = "PyFuncOp";
+constexpr char kNoOp[] = "NoOp";
+
 // A class that does a computation on  a Tensor
 class TensorOp {
  public:
@@ -141,6 +203,8 @@ class TensorOp {
   // @param outputs out: vector of the types of the output tensors to be filled.
   // @return Status
   virtual Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs);
+
+  virtual std::string Name() const = 0;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/text/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/text/kernels/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/text/kernels/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
index 3512a4b2d7..6195572944 100644
--- a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
 #include <memory>
 #include <queue>
 #include <string>
@@ -27,10 +27,12 @@
 
 namespace mindspore {
 namespace dataset {
+
 const bool BasicTokenizerOp::kDefLowerCase = false;
 const bool BasicTokenizerOp::kDefKeepWhitespace = false;
 const NormalizeForm BasicTokenizerOp::kDefNormalizationForm = NormalizeForm::kNone;
 const bool BasicTokenizerOp::kDefPreserveUnusedToken = true;
+const bool BasicTokenizerOp::kDefWithOffsets = false;
 const char BasicTokenizerOp::kCommonPattern[] =
   "[!-/]"
   "|[:-@]"
@@ -47,11 +49,14 @@ const char BasicTokenizerOp::kCommonPattern[] =
   "|[\\x{2F800}-\\x{2FA1F}]";
 const char BasicTokenizerOp::kUnusedPattern[] = "\\[CLS\\]|\\[SEP\\]|\\[UNK\\]|\\[PAD\\]|\\[MASK\\]|\\[unused\\d+\\]|";
 const std::unordered_set<std::string> BasicTokenizerOp::kUnusedWords{"[CLS]", "[SEP]", "[UNK]", "[PAD]", "[MASK]"};
-BasicTokenizerOp::BasicTokenizerOp(bool lower_case, bool keep_whitespace, NormalizeForm normalization_form,
-                                   bool preserve_unused_token)
+
+BasicTokenizerOp::BasicTokenizerOp(const bool &lower_case, const bool &keep_whitespace,
+                                   const NormalizeForm &normalization_form, const bool &preserve_unused_token,
+                                   const bool &with_offsets)
     : lower_case_(lower_case),
       keep_whitespace_(keep_whitespace),
       preserve_unused_token_(preserve_unused_token),
+      with_offsets_(with_offsets),
       case_fold_(std::make_unique<CaseFoldOp>()),
       nfd_normalize_(std::make_unique<NormalizeUTF8Op>(NormalizeForm::kNfd)),
       normalization_form_(normalization_form),
@@ -69,7 +74,7 @@ BasicTokenizerOp::BasicTokenizerOp(bool lower_case, bool keep_whitespace, Normal
     keep_delim_pattern = kUnusedPattern + keep_delim_pattern;
     delim_pattern = kUnusedPattern + delim_pattern;
   }
-  regex_tokenizer_ = std::make_unique<RegexTokenizerOp>(delim_pattern, keep_delim_pattern);
+  regex_tokenizer_ = std::make_unique<RegexTokenizerOp>(delim_pattern, keep_delim_pattern, with_offsets_);
 }
 
 Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text,
@@ -135,9 +140,10 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor
   return Status::OK();
 }
 
-Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::shared_ptr<Tensor> cur_input;
@@ -145,10 +151,10 @@ Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shar
   if (lower_case_) {
     if (!preserve_unused_token_) {
       // to lower case
-      RETURN_IF_NOT_OK(case_fold_->Compute(input, &processed_tensor));
+      RETURN_IF_NOT_OK(case_fold_->Compute(input[0], &processed_tensor));
     } else {
       // to lower case except words in kUnusedWords
-      RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(input, &processed_tensor));
+      RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(input[0], &processed_tensor));
     }
     cur_input = processed_tensor;
     // strip accent characters
@@ -156,12 +162,12 @@ Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shar
     cur_input = processed_tensor;
     RETURN_IF_NOT_OK(replace_accent_chars_->Compute(cur_input, &processed_tensor));
   } else {
-    RETURN_IF_NOT_OK(common_normalize_->Compute(input, &processed_tensor));
+    RETURN_IF_NOT_OK(common_normalize_->Compute(input[0], &processed_tensor));
   }
   // strip control characters
   cur_input = processed_tensor;
   RETURN_IF_NOT_OK(replace_control_chars_->Compute(cur_input, &processed_tensor));
-  return regex_tokenizer_->Compute(processed_tensor, output);
+  return regex_tokenizer_->Compute(TensorRow(0, {std::move(processed_tensor)}), output);
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
index 01827a0ba4..cbc21273c2 100644
--- a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
@@ -19,13 +19,13 @@
 #include <string>
 #include <unordered_set>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,25 +36,31 @@ class BasicTokenizerOp : public TensorOp {
   static const bool kDefKeepWhitespace;
   static const NormalizeForm kDefNormalizationForm;
   static const bool kDefPreserveUnusedToken;
-  explicit BasicTokenizerOp(bool lower_case = kDefLowerCase, bool keep_whitespace = kDefKeepWhitespace,
-                            NormalizeForm normalization_form = kDefNormalizationForm,
-                            bool preserve_unused_token = kDefPreserveUnusedToken);
+  static const bool kDefWithOffsets;
+
+  explicit BasicTokenizerOp(const bool &lower_case = kDefLowerCase, const bool &keep_whitespace = kDefKeepWhitespace,
+                            const NormalizeForm &normalization_form = kDefNormalizationForm,
+                            const bool &preserve_unused_token = kDefPreserveUnusedToken,
+                            const bool &with_offsets = kDefWithOffsets);
 
   ~BasicTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "BasicTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
   Status CaseFoldWithoutUnusedWords(const std::string_view &text, const std::unordered_set<std::string> &unused_words,
                                     std::string *outupt);
   Status CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
 
+  std::string Name() const override { return kBasicTokenizerOp; }
+
  private:
   static const char kCommonPattern[];
   static const char kUnusedPattern[];
   static const std::unordered_set<std::string> kUnusedWords;
+  bool with_offsets_;
   bool lower_case_;
   bool keep_whitespace_;
   NormalizeForm normalization_form_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
similarity index 79%
rename from mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
index 2b68a5accb..631597ba24 100644
--- a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/bert_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/bert_tokenizer_op.h"
 namespace mindspore {
 namespace dataset {
-Status BertTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  std::shared_ptr<Tensor> basic_tensor;
+Status BertTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  TensorRow basic_tensor;
   RETURN_IF_NOT_OK(basic_tokenizer_.Compute(input, &basic_tensor));
   RETURN_IF_NOT_OK(wordpiece_tokenizer_.Compute(basic_tensor, output));
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
similarity index 63%
rename from mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
index 660fdc7ba5..b281903349 100644
--- a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
@@ -18,11 +18,11 @@
 #include <memory>
 #include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -32,18 +32,21 @@ class BertTokenizerOp : public TensorOp {
                            const std::string &suffix_indicator = WordpieceTokenizerOp::kDefSuffixIndicator,
                            const int &max_bytes_per_token = WordpieceTokenizerOp::kDefMaxBytesPerToken,
                            const std::string &unknown_token = WordpieceTokenizerOp::kDefUnknownToken,
-                           bool lower_case = BasicTokenizerOp::kDefLowerCase,
-                           bool keep_whitespace = BasicTokenizerOp::kDefKeepWhitespace,
-                           NormalizeForm normalization_form = BasicTokenizerOp::kDefNormalizationForm,
-                           bool preserve_unused_token = BasicTokenizerOp::kDefPreserveUnusedToken)
-      : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token),
-        basic_tokenizer_(lower_case, keep_whitespace, normalization_form, preserve_unused_token) {}
+                           const bool &lower_case = BasicTokenizerOp::kDefLowerCase,
+                           const bool &keep_whitespace = BasicTokenizerOp::kDefKeepWhitespace,
+                           const NormalizeForm &normalization_form = BasicTokenizerOp::kDefNormalizationForm,
+                           const bool &preserve_unused_token = BasicTokenizerOp::kDefPreserveUnusedToken,
+                           const bool &with_offsets = WordpieceTokenizerOp::kDefWithOffsets)
+      : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token, with_offsets),
+        basic_tokenizer_(lower_case, keep_whitespace, normalization_form, preserve_unused_token, with_offsets) {}
 
   ~BertTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "BertTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kBertTokenizerOp; }
 
  private:
   WordpieceTokenizerOp wordpiece_tokenizer_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
index d935608efd..0ea5cadedb 100644
--- a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
similarity index 84%
rename from mindspore/ccsrc/dataset/text/kernels/case_fold_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
index d1b5ba53f1..f7a2105269 100644
--- a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_
 #define DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,6 +34,8 @@ class CaseFoldOp : public TensorOp {
   void Print(std::ostream &out) const override { out << "CaseFoldOp"; }
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kCaseFoldOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc
new file mode 100644
index 0000000000..0a1ae92d14
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
+
+#include <vector>
+#include <memory>
+#include <string>
+#include "minddata/dataset/util/path.h"
+
+namespace mindspore {
+namespace dataset {
+
+const bool JiebaTokenizerOp::kDefWithOffsets = false;
+
+JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::string &dict_path, const JiebaMode &mode,
+                                   const bool &with_offsets)
+    : jieba_mode_(mode), hmm_model_path_(hmm_path), mp_dict_path_(dict_path), with_offsets_(with_offsets) {
+  jieba_parser_ = std::make_unique<cppjieba::Jieba>(mp_dict_path_, hmm_model_path_, "");
+}
+
+Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
+
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
+    RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
+  }
+
+  std::string_view sentence_v;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&sentence_v, {}));
+  std::string sentence{sentence_v};
+  std::vector<std::string> words;
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  if (sentence == "") {
+    words.push_back("");
+  } else {
+    std::vector<cppjieba::Word> tmp;
+    if (jieba_mode_ == JiebaMode::kMp) {
+      std::unique_ptr<cppjieba::MPSegment> mp_seg = std::make_unique<cppjieba::MPSegment>(jieba_parser_->GetDictTrie());
+      mp_seg->Cut(sentence, tmp, MAX_WORD_LENGTH);
+    } else if (jieba_mode_ == JiebaMode::kHmm) {
+      std::unique_ptr<cppjieba::HMMSegment> hmm_seg =
+        std::make_unique<cppjieba::HMMSegment>(jieba_parser_->GetHMMModel());
+      hmm_seg->Cut(sentence, tmp);
+    } else {  // Mix
+      std::unique_ptr<cppjieba::MixSegment> mix_seg =
+        std::make_unique<cppjieba::MixSegment>(jieba_parser_->GetDictTrie(), jieba_parser_->GetHMMModel());
+      mix_seg->Cut(sentence, tmp, true);
+    }
+    GetStringsFromWords(tmp, words);
+    for (auto item : tmp) {
+      offsets_start.push_back(static_cast<uint32_t>(item.offset));
+      offsets_limit.push_back(static_cast<uint32_t>(item.offset + item.word.length()));
+    }
+  }
+  token_tensor = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
+  return Status::OK();
+}
+
+Status JiebaTokenizerOp::AddWord(const std::string &word, int freq) {
+  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
+  if (jieba_parser_->InsertUserWord(word, freq, "") == false) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "add word error");
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
similarity index 77%
rename from mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
index 41736e4fdb..4e49891c00 100644
--- a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
@@ -20,8 +20,8 @@
 #include <memory>
 
 #include "cppjieba/Jieba.hpp"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -30,15 +30,19 @@ enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
 
 class JiebaTokenizerOp : public TensorOp {
  public:
-  // deffault constant for Jieba MPSegment algorithm.
+  // default constant for Jieba MPSegment algorithm.
   static constexpr size_t MAX_WORD_LENGTH = 512;
+  // default const for set whether Jieba output offsets tensor.
+  static const bool kDefWithOffsets;
   // Constructor for JiebaTokenizerOp.
   // @param hmm_path HMM model file.
   // @param mp_path MP model file.
   // @mode tokenization mode [Default "MIX"], "MP" model will tokenize with MPSegment algorithm, "HMM" mode will
   // tokenize with Hiddel Markov Model Segment algorithm, "MIx" model will tokenize with a mix of MPSegment and
   // HMMSegment algorithm.
-  JiebaTokenizerOp(const std::string &hmm_path, const std::string &mp_path, JiebaMode mode = JiebaMode::kMix);
+  // @with_offsets user set this value to choose whether output offset tensor.
+  JiebaTokenizerOp(const std::string &hmm_path, const std::string &mp_path, const JiebaMode &mode = JiebaMode::kMix,
+                   const bool &with_offsets = kDefWithOffsets);
   ~JiebaTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -46,18 +50,21 @@ class JiebaTokenizerOp : public TensorOp {
         << mp_dict_path_;
   }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
   // @word the word to be added to the JiebaTokenizer.
   // @freq [Default 0] the frequency fo the word to be added.
   // @tag [Default ""] the tag of the word to be added.
   Status AddWord(const std::string &word, int freq = 0);
 
+  std::string Name() const override { return kJiebaTokenizerOp; }
+
  protected:
   std::string hmm_model_path_;
   std::string mp_dict_path_;
   std::unique_ptr<cppjieba::Jieba> jieba_parser_;
   JiebaMode jieba_mode_;
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/text/kernels/lookup_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
index 07cf7aef5c..02b75bc4f9 100644
--- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/lookup_op.h"
+#include "minddata/dataset/text/kernels/lookup_op.h"
 
 #include <string>
 
@@ -26,11 +26,15 @@ LookupOp::LookupOp(std::shared_ptr<Vocab> vocab, WordIdType default_id)
 Status LookupOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   RETURN_UNEXPECTED_IF_NULL(vocab_);
-  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None String Tensor");
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None String Tensor.");
   std::vector<WordIdType> word_ids;
   word_ids.reserve(input->Size());
   for (auto itr = input->begin<std::string_view>(); itr != input->end<std::string_view>(); itr++) {
-    word_ids.push_back(vocab_->Lookup(std::string(*itr), default_id_));
+    WordIdType word_id = vocab_->Lookup(std::string(*itr));
+    word_ids.emplace_back(word_id == Vocab::kNoTokenExists ? default_id_ : word_id);
+    CHECK_FAIL_RETURN_UNEXPECTED(
+      word_ids.back() != Vocab::kNoTokenExists,
+      "Lookup Error: token" + std::string(*itr) + "doesn't exist in vocab and no unknown token is specified.");
   }
 
   RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), type_,
diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/text/kernels/lookup_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
index dad99c3241..4efc64321b 100644
--- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
@@ -20,11 +20,12 @@
 #include <memory>
 #include <vector>
 #include <utility>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/text/vocab.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/text/vocab.h"
 
 namespace mindspore {
 namespace dataset {
@@ -52,6 +53,8 @@ class LookupOp : public TensorOp {
   // @return error code
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kLookupOp; }
+
  private:
   std::shared_ptr<Vocab> vocab_;
   WordIdType default_id_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/text/kernels/ngram_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
index bbe449a89a..36781b9b4d 100644
--- a/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/ngram_op.h"
+#include "minddata/dataset/text/kernels/ngram_op.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/text/kernels/ngram_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
index 3d2c547f79..6ce3881638 100644
--- a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
@@ -21,13 +21,12 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
-namespace py = pybind11;
 
 class NgramOp : public TensorOp {
  public:
@@ -59,6 +58,8 @@ class NgramOp : public TensorOp {
   // @param std::ostream &out
   void Print(std::ostream &out) const override;
 
+  std::string Name() const override { return kNgramOp; }
+
  private:
   std::vector<int32_t> ngrams_;  // list of n grams
   int32_t l_len_;                // left padding length
diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
index b902286576..0c0aa5fa2d 100644
--- a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
index 5033f2355f..f914be1c58 100644
--- a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_
 #define DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +43,8 @@ class NormalizeUTF8Op : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kNormalizeUTF8Op; }
+
  private:
   NormalizeForm normalize_form_;
 };
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
index 1ce2c5ea61..c370393e76 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
index 30fae13241..ac3d3f7ff0 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
@@ -22,9 +22,9 @@
 #include "unicode/errorcode.h"
 #include "unicode/utypes.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class RegexReplaceOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRegexReplaceOp; }
+
  protected:
   Status RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, std::string *out) const;
 
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
similarity index 55%
rename from mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
index 34c06f28ea..7ff1d994be 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -22,8 +22,11 @@
 
 namespace mindspore {
 namespace dataset {
-Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8,
-                                          icu::UnicodeString *out_unicode) const {
+
+const bool RegexTokenizerOp::kDefWithOffsets = false;
+
+Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, const int &start, const int &len,
+                                          std::string *out_utf8, icu::UnicodeString *out_unicode) const {
   CHECK_FAIL_RETURN_UNEXPECTED((out_utf8 != nullptr || out_unicode != nullptr), "Wrong input");
   int total_len = input.length();
   int end = start + len;
@@ -39,7 +42,9 @@ Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, int s
   return Status::OK();
 }
 
-Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens) const {
+Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens,
+                                        std::vector<uint32_t> *offsets_start,
+                                        std::vector<uint32_t> *offsets_limit) const {
   UErrorCode status = U_ZERO_ERROR;
   out_tokens->clear();
   icu::RegexMatcher token_matcher(delim_pattern_, 0, status);
@@ -50,6 +55,7 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std
   icu::UnicodeString utext(icu::UnicodeString::fromUTF8(text));
   token_matcher.reset(utext);
 
+  int text_start_index = 0;
   int token_start_index = 0;
   status = U_ZERO_ERROR;
   while (token_matcher.find(status) && U_SUCCESS(status)) {
@@ -62,41 +68,70 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std
     int token_len = deli_start_index - token_start_index;
     if (token_len > 0) {
       std::string token;
+      uint32_t token_offset = 0;
       RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, token_len, &token));
+      token_offset = token.length();
       out_tokens->emplace_back(std::move(token));
+      offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+      offsets_limit->push_back(static_cast<uint32_t>(text_start_index + token_offset));
+      text_start_index += token_offset;
     }
 
     int delim_len = deli_end_index - deli_start_index;
-    if (keep_delim_ && delim_len > 0) {
+    if (delim_len > 0) {
       icu::UnicodeString delim_str;
       std::string delim_utf8_str;
+      uint32_t delim_str_offset = 0;
       RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, deli_start_index, delim_len, &delim_utf8_str, &delim_str));
       delim_matcher.reset(delim_str);
-      if (delim_matcher.matches(status) && U_SUCCESS(status)) {
+      delim_str_offset = delim_utf8_str.length();
+      if (keep_delim_ && delim_matcher.matches(status) && U_SUCCESS(status)) {
         out_tokens->emplace_back(std::move(delim_utf8_str));
+        offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+        offsets_limit->push_back(static_cast<uint32_t>(text_start_index + delim_str_offset));
       }
+      text_start_index += delim_str_offset;
     }
     token_start_index = deli_end_index;
   }
 
   if (token_start_index < utext.length()) {
     std::string temp;
+    uint32_t temp_offset = 0;
     RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, utext.length() - token_start_index, &temp));
+    temp_offset = temp.length();
     out_tokens->emplace_back(std::move(temp));
+    offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+    offsets_limit->push_back(static_cast<uint32_t>(text_start_index + temp_offset));
   }
   return Status::OK();
 }
 
-Status RegexTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status RegexTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view text;
-  RETURN_IF_NOT_OK(input->GetItemAt(&text, {}));
   std::vector<std::string> tokens;
-  RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens));
-  *output = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()}));
+  std::vector<uint32_t> offsets_start;
+  std::vector<uint32_t> offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&text, {}));
+  RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens, &offsets_start, &offsets_limit));
+  token_tensor = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
similarity index 70%
rename from mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
index bcf02a4a11..56271f9551 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
@@ -23,34 +23,42 @@
 #include "unicode/errorcode.h"
 #include "unicode/utypes.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class RegexTokenizerOp : public TensorOp {
  public:
-  RegexTokenizerOp(const std::string &delim_pattern, const std::string &keep_delim_pattern)
+  static const bool kDefWithOffsets;
+
+  RegexTokenizerOp(const std::string &delim_pattern, const std::string &keep_delim_pattern,
+                   const bool &with_offsets = kDefWithOffsets)
       : delim_pattern_(icu::UnicodeString::fromUTF8(delim_pattern)),
         keep_delim_pattern_(icu::UnicodeString::fromUTF8(keep_delim_pattern)),
+        with_offsets_(with_offsets),
         keep_delim_(!keep_delim_pattern.empty()) {}
 
   ~RegexTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "RegexTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
-  Status GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8,
+  Status GetUnicodeSubstr(const icu::UnicodeString &input, const int &start, const int &len, std::string *out_utf8,
                           icu::UnicodeString *out_unicode = nullptr) const;
-  Status GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens) const;
+  Status GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens,
+                        std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
+
+  std::string Name() const override { return kRegexTokenizerOp; }
 
  private:
   const icu::UnicodeString delim_pattern_;
   const icu::UnicodeString keep_delim_pattern_;
+  bool with_offsets_;
   const bool keep_delim_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/text/kernels/to_number_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
index 1368684daf..a6685a2d64 100644
--- a/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/to_number_op.h"
+#include "minddata/dataset/text/kernels/to_number_op.h"
 
 #include <algorithm>
 #include <limits>
@@ -23,11 +23,11 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/text/kernels/to_number_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
index 1346ce2f47..8582fcf073 100644
--- a/mindspore/ccsrc/dataset/text/kernels/to_number_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
@@ -21,10 +21,10 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -57,6 +57,8 @@ class ToNumberOp : public TensorOp {
   // @param std::ostream &out
   void Print(std::ostream &out) const override;
 
+  std::string Name() const override { return kToNumberOp; }
+
  private:
   template <typename T>
   Status ToSignedIntegral(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
index 136d5006df..53a803c542 100644
--- a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
index e8be6802a8..ce82735645 100644
--- a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,6 +40,8 @@ class TruncateSequencePairOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kTruncateSequencePairOp; }
+
  private:
   dsize_t max_length_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc
new file mode 100644
index 0000000000..e08f61100b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "cppjieba/Unicode.hpp"
+
+using cppjieba::DecodeRunesInString;
+using cppjieba::RuneStrArray;
+
+namespace mindspore {
+namespace dataset {
+
+const bool UnicodeCharTokenizerOp::kDefWithOffsets = false;
+
+Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
+    RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
+  }
+  std::string_view str;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
+
+  RuneStrArray runes;
+  if (!DecodeRunesInString(str.data(), str.size(), runes)) {
+    RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
+  }
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  std::vector<std::string> splits(runes.size());
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  for (size_t i = 0; i < runes.size(); i++) {
+    offsets_start.push_back(runes[i].offset);
+    offsets_limit.push_back(runes[i].offset + runes[i].len);
+    splits[i] = str.substr(runes[i].offset, runes[i].len);
+  }
+  if (splits.empty()) {
+    splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
index 01a84eca8b..415d99b451 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
@@ -16,23 +16,31 @@
 #ifndef DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class UnicodeCharTokenizerOp : public TensorOp {
  public:
-  UnicodeCharTokenizerOp() {}
+  static const bool kDefWithOffsets;
+
+  explicit UnicodeCharTokenizerOp(const bool &with_offsets = kDefWithOffsets) : with_offsets_(with_offsets) {}
 
   ~UnicodeCharTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "UnicodeCharTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kUnicodeCharTokenizerOp; }
+
+ private:
+  bool with_offsets_;
 };
 
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
similarity index 61%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
index 97a4f1333d..60fe8dd0e4 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -32,24 +32,28 @@ namespace mindspore {
 namespace dataset {
 
 const bool UnicodeScriptTokenizerOp::kDefKeepWhitespace = false;
+const bool UnicodeScriptTokenizerOp::kDefWithOffsets = false;
 
-Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
   RuneStrArray runes;
   if (!DecodeRunesInString(str.data(), str.size(), runes)) {
     RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
   }
 
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
   UScriptCode last_script = USCRIPT_INVALID_CODE;
   icu::ErrorCode status;
   int start = 0;
   int len = 0;
   std::vector<std::string> splits;
+  std::vector<uint32_t> offsets_start, offsets_limit;
 
   bool was_space = false;
   for (size_t i = 0; i < runes.size(); i++) {
@@ -66,6 +70,8 @@ Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
     if (len > 0 && (script != last_script || is_space != was_space)) {
       // 3) If keep_whitespace_ is false, all the whitespace characters will be discard
       if (keep_whitespace_ || !was_space) {
+        offsets_start.push_back(static_cast<uint32_t>(start));
+        offsets_limit.push_back(static_cast<uint32_t>(start + len));
         std::string temp(str.substr(start, len));
         splits.emplace_back(std::move(temp));
       }
@@ -79,14 +85,29 @@ Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
   }
 
   if (len > 0 && (keep_whitespace_ || !was_space)) {
+    offsets_start.push_back(static_cast<uint32_t>(start));
+    offsets_limit.push_back(static_cast<uint32_t>(start + len));
     std::string temp(str.substr(start, len));
     splits.emplace_back(std::move(temp));
   }
   // 4) If the input is empty scalar string, the output will be 1-D empty string.
   if (splits.empty()) {
     splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
similarity index 67%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
index a77b0b3fa3..fc3b9e620a 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -27,17 +28,23 @@ namespace dataset {
 class UnicodeScriptTokenizerOp : public TensorOp {
  public:
   static const bool kDefKeepWhitespace;
+  static const bool kDefWithOffsets;
 
-  explicit UnicodeScriptTokenizerOp(bool keep_whitespace = kDefKeepWhitespace) : keep_whitespace_(keep_whitespace) {}
+  explicit UnicodeScriptTokenizerOp(const bool &keep_whitespace = kDefKeepWhitespace,
+                                    const bool &with_offsets = kDefWithOffsets)
+      : keep_whitespace_(keep_whitespace), with_offsets_(with_offsets) {}
 
   ~UnicodeScriptTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "UnicodeScriptTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kUnicodeScriptTokenizerOp; }
 
  private:
   bool keep_whitespace_;  // If or not keep whitespace tokens
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
similarity index 51%
rename from mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
index 35f3f8d0e2..d3bb32081e 100644
--- a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -30,24 +30,33 @@ using cppjieba::RuneStrArray;
 
 namespace mindspore {
 namespace dataset {
-Status WhitespaceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+
+const bool WhitespaceTokenizerOp::kDefWithOffsets = false;
+
+Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
 
   RuneStrArray runes;
   if (!DecodeRunesInString(str.data(), str.size(), runes)) {
     RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
   }
+
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  std::vector<uint32_t> offsets_start, offsets_limit;
   std::vector<std::string> splits;
   int start = 0;
   int len = 0;
   for (size_t i = 0; i < runes.size(); i++) {
     if (u_isUWhiteSpace(runes[i].rune)) {
       if (len > 0) {
+        offsets_start.push_back(static_cast<uint32_t>(start));
+        offsets_limit.push_back(static_cast<uint32_t>(start + len));
         std::string temp(str.substr(start, len));
         splits.emplace_back(std::move(temp));
         len = 0;
@@ -60,13 +69,28 @@ Status WhitespaceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std:
     }
   }
   if (len > 0) {
+    offsets_start.push_back(static_cast<uint32_t>(start));
+    offsets_limit.push_back(static_cast<uint32_t>(start + len));
     std::string temp(str.substr(start, len));
     splits.emplace_back(std::move(temp));
   }
   if (splits.empty()) {
     splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
index 6d0bab0bea..7cc37fd705 100644
--- a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
@@ -16,23 +16,31 @@
 #ifndef DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class WhitespaceTokenizerOp : public TensorOp {
  public:
-  WhitespaceTokenizerOp() {}
+  static const bool kDefWithOffsets;
+
+  explicit WhitespaceTokenizerOp(const bool &with_offsets = kDefWithOffsets) : with_offsets_(with_offsets) {}
 
   ~WhitespaceTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "WhitespaceTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kWhitespaceTokenizerOp; }
+
+ private:
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
similarity index 50%
rename from mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
index e488c527cd..f0bd448e39 100644
--- a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
 #include <algorithm>
 #include <utility>
 
@@ -24,13 +24,16 @@ namespace dataset {
 const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##";
 const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100;
 const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]";
+const bool WordpieceTokenizerOp::kDefWithOffsets = false;
 
 WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator,
-                                           const int &max_bytes_per_token, const std::string &unknown_token)
+                                           const int &max_bytes_per_token, const std::string &unknown_token,
+                                           const bool &with_offsets)
     : vocab_(vocab),
       suffix_indicator_(suffix_indicator),
       max_bytes_per_token_(max_bytes_per_token),
-      unknown_token_(unknown_token) {}
+      unknown_token_(unknown_token),
+      with_offsets_(with_offsets) {}
 
 Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start,
                                         bool *out_found, int *out_end) const {
@@ -43,8 +46,7 @@ Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const Ru
     if (start > 0) {
       word = suffix_indicator_ + word;
     }
-    WordIdType default_id = -1;
-    if (vocab_->Lookup(word, default_id) != default_id) {
+    if (vocab_->Lookup(word) != Vocab::kNoTokenExists) {
       *out_found = true;
       break;
     }
@@ -52,17 +54,22 @@ Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const Ru
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, std::vector<std::string> *out_tokens) const {
+Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start,
+                                          std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
+                                          std::vector<uint32_t> *offsets_limit) const {
   out_tokens->clear();
+  offsets_start->push_back(basic_start);
   if (unknown_token_.empty()) {
     out_tokens->emplace_back(input_token);
+    offsets_limit->push_back(basic_start + input_token.length());
   } else {
     out_tokens->emplace_back(unknown_token_);
+    offsets_limit->push_back(basic_start + input_token.length());
   }
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int start, const int end,
+Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end,
                                         std::vector<std::string> *out_tokens) const {
   CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range");
   std::string subword = input_token.substr(start, end - start);
@@ -73,9 +80,19 @@ Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const in
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, std::vector<std::string> *out_tokens) const {
+Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start,
+                                       std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
+                                       std::vector<uint32_t> *offsets_limit) const {
   if (input_token.size() > max_bytes_per_token_) {
-    return FoundNoToken(input_token, out_tokens);
+    offsets_start->push_back(basic_start);
+    if (!unknown_token_.empty()) {
+      offsets_limit->push_back(basic_start + unknown_token_.size());
+      out_tokens->emplace_back(unknown_token_);
+    } else {
+      out_tokens->emplace_back(input_token);
+      offsets_limit->push_back(basic_start + input_token.size());
+    }
+    return Status::OK();
   }
   RuneStrArray runes;
   if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) {
@@ -87,29 +104,52 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, std::vect
     RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end));
     if (found) {
       RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens));
+      offsets_start->push_back(static_cast<uint32_t>(basic_start + start));
+      offsets_limit->push_back(static_cast<uint32_t>(basic_start + end));
       start = end;
     } else {
-      return FoundNoToken(input_token, out_tokens);
+      return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit);
     }
   }
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() > 1 || input->type() != DataType::DE_STRING) {
+Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
   }
+  dsize_t count = 0;
   std::vector<std::string> out_tokens;
-  for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) {
+    uint32_t basic_start = 0;
     std::vector<std::string> temp_tokens;
-    RETURN_IF_NOT_OK(GetTokens(std::string(*iter), &temp_tokens));
+    if (with_offsets_ && input.size() == 3) {
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0}));
+    }
+    RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit));
     out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end());
+    count++;
   }
   if (out_tokens.empty()) {
     out_tokens.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()}));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
similarity index 65%
rename from mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
index c9a75025c6..4f9c76f57e 100644
--- a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
@@ -22,10 +22,10 @@
 
 #include "cppjieba/Unicode.hpp"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/status.h"
 
 using cppjieba::DecodeRunesInString;
 using cppjieba::RuneStrArray;
@@ -37,27 +37,33 @@ class WordpieceTokenizerOp : public TensorOp {
   static const char kDefSuffixIndicator[];
   static const int kDefMaxBytesPerToken;
   static const char kDefUnknownToken[];
+  static const bool kDefWithOffsets;
   WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = kDefSuffixIndicator,
                        const int &max_bytes_per_token = kDefMaxBytesPerToken,
-                       const std::string &unknown_token = kDefUnknownToken);
+                       const std::string &unknown_token = kDefUnknownToken, const bool &with_offsets = kDefWithOffsets);
 
   ~WordpieceTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "WordpieceTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
-  Status AddSubword(const std::string &input_token, const int start, const int end,
+  Status AddSubword(const std::string &input_token, const int &start, const int &end,
                     std::vector<std::string> *out_token) const;
-  Status FoundNoToken(const std::string &input_token, std::vector<std::string> *out_tokens) const;
+  Status FoundNoToken(const std::string &input_token, const uint32_t &basic_start, std::vector<std::string> *out_tokens,
+                      std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
   Status LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, bool *out_found,
                     int *out_end) const;
-  Status GetTokens(const std::string &input_token, std::vector<std::string> *out_tokens) const;
+  Status GetTokens(const std::string &input_token, const uint32_t &basic_start, std::vector<std::string> *out_tokens,
+                   std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
+
+  std::string Name() const override { return kWordpieceTokenizerOp; }
 
  private:
   const std::shared_ptr<Vocab> vocab_;
   const std::string suffix_indicator_;
+  const bool with_offsets_;
   const int max_bytes_per_token_;
   const std::string unknown_token_;
 };
diff --git a/mindspore/ccsrc/dataset/text/vocab.cc b/mindspore/ccsrc/minddata/dataset/text/vocab.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/text/vocab.cc
rename to mindspore/ccsrc/minddata/dataset/text/vocab.cc
index 100dc9d655..c1b7e6265c 100644
--- a/mindspore/ccsrc/dataset/text/vocab.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/vocab.cc
@@ -18,15 +18,15 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/text/vocab.h"
+#include "minddata/dataset/text/vocab.h"
 
 namespace mindspore {
 namespace dataset {
 Vocab::Vocab(std::unordered_map<WordType, WordIdType> word2id) { word2id_ = std::move(word2id); }
 
-WordIdType Vocab::Lookup(const WordType &word, WordIdType default_id) const {
+WordIdType Vocab::Lookup(const WordType &word) const {
   auto itr = word2id_.find(word);
-  return itr == word2id_.end() ? default_id : itr->second;
+  return itr == word2id_.end() ? kNoTokenExists : itr->second;
 }
 
 Status Vocab::BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_special,
@@ -100,5 +100,8 @@ void Vocab::append_word(const std::string &word) {
     word2id_[word] = word2id_.size();
   }
 }
+
+const WordIdType Vocab::kNoTokenExists = -1;
+
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/vocab.h b/mindspore/ccsrc/minddata/dataset/text/vocab.h
similarity index 92%
rename from mindspore/ccsrc/dataset/text/vocab.h
rename to mindspore/ccsrc/minddata/dataset/text/vocab.h
index fc21c380a2..6bf6c488c5 100644
--- a/mindspore/ccsrc/dataset/text/vocab.h
+++ b/mindspore/ccsrc/minddata/dataset/text/vocab.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
@@ -61,12 +61,7 @@ class Vocab {
   // @param const WordType word - word to look up
   // @param WordIdType default_id - word id to return to user when its not in the vocab
   // @return WordIdType, word_id
-  WordIdType Lookup(const WordType &word, WordIdType default_id) const;
-
-  // reverse lookup, lookup the word based on its id
-  // @param WordIdType id - word id to lookup to
-  // @return WordType the word
-  WordType Lookup(WordIdType id);
+  WordIdType Lookup(const WordType &word) const;
 
   // constructor, shouldn't be called directly, can't be private due to std::make_unique()
   // @param std::unordered_map<WordType, WordIdType> map - sanitized word2id map
@@ -81,6 +76,8 @@ class Vocab {
   // destructor
   ~Vocab() = default;
 
+  static const WordIdType kNoTokenExists;
+
  private:
   std::unordered_map<WordType, WordIdType> word2id_;
 };
diff --git a/mindspore/ccsrc/dataset/util/.gitignore b/mindspore/ccsrc/minddata/dataset/util/.gitignore
similarity index 100%
rename from mindspore/ccsrc/dataset/util/.gitignore
rename to mindspore/ccsrc/minddata/dataset/util/.gitignore
diff --git a/mindspore/ccsrc/dataset/util/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/util/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/util/README.md b/mindspore/ccsrc/minddata/dataset/util/README.md
similarity index 100%
rename from mindspore/ccsrc/dataset/util/README.md
rename to mindspore/ccsrc/minddata/dataset/util/README.md
diff --git a/mindspore/ccsrc/dataset/util/allocator.h b/mindspore/ccsrc/minddata/dataset/util/allocator.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/allocator.h
rename to mindspore/ccsrc/minddata/dataset/util/allocator.h
index 50a9cadbe3..b5eaed97a6 100644
--- a/mindspore/ccsrc/dataset/util/allocator.h
+++ b/mindspore/ccsrc/minddata/dataset/util/allocator.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <type_traits>
 #include <utility>
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
 
 namespace mindspore {
 namespace dataset {
@@ -87,8 +87,9 @@ class Allocator {
   std::shared_ptr<MemoryPool> pool_;
 };
 /// \brief It is a wrapper of unique_ptr with a custom allocator and acts like std::lock_guard such that the memory will
-/// be released when the object goes out of scope \tparam T The type of object to be allocated \tparam C Allocator.
-/// Default to std::allocator
+/// be released when the object goes out of scope
+/// \tparam T The type of object to be allocated
+/// \tparam C Allocator. Default to std::allocator
 template <typename T, typename C = std::allocator<T>>
 class MemGuard {
  public:
@@ -168,7 +169,7 @@ class MemGuard {
 
  private:
   allocator alloc_;
-  std::unique_ptr<T[], std::function<void(T *)>> ptr_;
+  std::unique_ptr<T[]> ptr_;
   size_t n_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/util/arena.cc b/mindspore/ccsrc/minddata/dataset/util/arena.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/arena.cc
rename to mindspore/ccsrc/minddata/dataset/util/arena.cc
index af4f522678..87a9c614a8 100644
--- a/mindspore/ccsrc/dataset/util/arena.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/arena.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/arena.h"
+#include "minddata/dataset/util/arena.h"
 #include <unistd.h>
 #include <utility>
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "./securec.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/util/arena.h b/mindspore/ccsrc/minddata/dataset/util/arena.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/arena.h
rename to mindspore/ccsrc/minddata/dataset/util/arena.h
index 8c5d1e1093..8887757af1 100644
--- a/mindspore/ccsrc/dataset/util/arena.h
+++ b/mindspore/ccsrc/minddata/dataset/util/arena.h
@@ -19,8 +19,8 @@
 #include <memory>
 #include <mutex>
 #include <utility>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/treap.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/treap.h"
 
 #define ARENA_LOG_BLK_SZ (6u)
 #define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
diff --git a/mindspore/ccsrc/dataset/util/auto_index.h b/mindspore/ccsrc/minddata/dataset/util/auto_index.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/auto_index.h
rename to mindspore/ccsrc/minddata/dataset/util/auto_index.h
index 5c43ecfd80..0fe55159e6 100644
--- a/mindspore/ccsrc/dataset/util/auto_index.h
+++ b/mindspore/ccsrc/minddata/dataset/util/auto_index.h
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/util/btree.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/bit.h b/mindspore/ccsrc/minddata/dataset/util/bit.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/bit.h
rename to mindspore/ccsrc/minddata/dataset/util/bit.h
diff --git a/mindspore/ccsrc/dataset/util/btree.h b/mindspore/ccsrc/minddata/dataset/util/btree.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/btree.h
rename to mindspore/ccsrc/minddata/dataset/util/btree.h
index ccf642e366..828976a0a1 100644
--- a/mindspore/ccsrc/dataset/util/btree.h
+++ b/mindspore/ccsrc/minddata/dataset/util/btree.h
@@ -23,12 +23,12 @@
 #include <memory>
 #include <deque>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/list.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/list.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/btree_impl.tpp b/mindspore/ccsrc/minddata/dataset/util/btree_impl.tpp
similarity index 100%
rename from mindspore/ccsrc/dataset/util/btree_impl.tpp
rename to mindspore/ccsrc/minddata/dataset/util/btree_impl.tpp
diff --git a/mindspore/ccsrc/dataset/util/btree_iterator.tpp b/mindspore/ccsrc/minddata/dataset/util/btree_iterator.tpp
similarity index 100%
rename from mindspore/ccsrc/dataset/util/btree_iterator.tpp
rename to mindspore/ccsrc/minddata/dataset/util/btree_iterator.tpp
diff --git a/mindspore/ccsrc/dataset/util/buddy.cc b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/buddy.cc
rename to mindspore/ccsrc/minddata/dataset/util/buddy.cc
index 540fa993d6..d4f5434f81 100644
--- a/mindspore/ccsrc/dataset/util/buddy.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/buddy.h"
+#include "minddata/dataset/util/buddy.h"
 #include <iomanip>
 #include <stdexcept>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "utils/log_adapter.h"
 #include "./securec.h"
 
diff --git a/mindspore/ccsrc/dataset/util/buddy.h b/mindspore/ccsrc/minddata/dataset/util/buddy.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/buddy.h
rename to mindspore/ccsrc/minddata/dataset/util/buddy.h
index 08c05cbbdb..b1bcd3ce41 100644
--- a/mindspore/ccsrc/dataset/util/buddy.h
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.h
@@ -22,7 +22,7 @@
 #include <iostream>
 #include <memory>
 #include <mutex>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 using addr_t = int64_t;
 using rel_addr_t = int32_t;
diff --git a/mindspore/ccsrc/dataset/util/cache_pool.cc b/mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/cache_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
index 92504cd063..22fb72eb8a 100644
--- a/mindspore/ccsrc/dataset/util/cache_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
@@ -15,8 +15,8 @@
  */
 #include <algorithm>
 #include "common/utils.h"
-#include "dataset/util/cache_pool.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/services.h"
 
 namespace mindspore {
 namespace dataset {
@@ -98,11 +98,6 @@ Status CachePool::Insert(const std::vector<ReadableSlice> &buf, CachePool::key_t
   } catch (std::bad_alloc &e) {
     if (sm_ != nullptr) {
       RETURN_IF_NOT_OK(sm_->Write(&bl.storage_key, buf));
-      // We have an assumption 0 is not a valid key from the design of AutoIndexObj.
-      // Make sure it is not 0.
-      if (bl.storage_key == 0) {
-        RETURN_STATUS_UNEXPECTED("Key 0 is returned which is unexpected");
-      }
     } else {
       return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
     }
diff --git a/mindspore/ccsrc/dataset/util/cache_pool.h b/mindspore/ccsrc/minddata/dataset/util/cache_pool.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/cache_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/cache_pool.h
index d35617d0e4..cdb6da16b6 100644
--- a/mindspore/ccsrc/dataset/util/cache_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/cache_pool.h
@@ -20,11 +20,11 @@
 #include <mutex>
 #include <string>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/service.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/storage_manager.h"
-#include "dataset/util/auto_index.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/storage_manager.h"
+#include "minddata/dataset/util/auto_index.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/circular_pool.cc b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/circular_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
index 0c68dab81b..f99e6de2f1 100644
--- a/mindspore/ccsrc/dataset/util/circular_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
+#include "minddata/dataset/util/circular_pool.h"
 
 #include <algorithm>
 #include <limits>
 #include <utility>
 #include "./securec.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
@@ -88,6 +88,9 @@ Status CircularPool::Allocate(size_t n, void **p) {
     while (cirIt.has_next()) {
       auto it = cirIt.Next();
       Arena *ba = it->get();
+      if (ba->get_max_size() < n) {
+        return Status(StatusCode::kOutOfMemory);
+      }
       // If we are asked to move forward the tail
       if (move_tail) {
         Arena *expected = cirIt.cur_tail_;
diff --git a/mindspore/ccsrc/dataset/util/circular_pool.h b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/circular_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/circular_pool.h
index 3c52659799..a63afbd691 100644
--- a/mindspore/ccsrc/dataset/util/circular_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h
@@ -19,9 +19,9 @@
 #include <atomic>
 #include <memory>
 #include <vector>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/arena.h"
-#include "dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/lock.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/cond_var.cc b/mindspore/ccsrc/minddata/dataset/util/cond_var.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/cond_var.cc
rename to mindspore/ccsrc/minddata/dataset/util/cond_var.cc
index 8b1099fb71..b7c7b76cae 100644
--- a/mindspore/ccsrc/dataset/util/cond_var.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/cond_var.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/cond_var.h"
 #include <exception>
 #include <utility>
-#include "dataset/util/services.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/cond_var.h b/mindspore/ccsrc/minddata/dataset/util/cond_var.h
similarity index 90%
rename from mindspore/ccsrc/dataset/util/cond_var.h
rename to mindspore/ccsrc/minddata/dataset/util/cond_var.h
index b23dcd566e..88fcad24a2 100644
--- a/mindspore/ccsrc/dataset/util/cond_var.h
+++ b/mindspore/ccsrc/minddata/dataset/util/cond_var.h
@@ -21,9 +21,9 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_resource.h b/mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/intrp_resource.h
rename to mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
index 52024cb90a..9d78e2cd32 100644
--- a/mindspore/ccsrc/dataset/util/intrp_resource.h
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
@@ -17,7 +17,7 @@
 #define DATASET_UTIL_INTRP_RESOURCE_H_
 
 #include <atomic>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_service.cc b/mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/util/intrp_service.cc
rename to mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
index da8dde992c..a82c82cdc9 100644
--- a/mindspore/ccsrc/dataset/util/intrp_service.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/intrp_service.h"
+#include "minddata/dataset/util/intrp_service.h"
 #include <sstream>
 #include "common/utils.h"
-#include "dataset/util/services.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_service.h b/mindspore/ccsrc/minddata/dataset/util/intrp_service.h
similarity index 87%
rename from mindspore/ccsrc/dataset/util/intrp_service.h
rename to mindspore/ccsrc/minddata/dataset/util/intrp_service.h
index de1d5eb753..cb6bf30c73 100644
--- a/mindspore/ccsrc/dataset/util/intrp_service.h
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_service.h
@@ -21,11 +21,11 @@
 #include <mutex>
 #include <string>
 #include <utility>
-#include "dataset/util/allocator.h"
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/service.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/util/list.h b/mindspore/ccsrc/minddata/dataset/util/list.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/list.h
rename to mindspore/ccsrc/minddata/dataset/util/list.h
diff --git a/mindspore/ccsrc/dataset/util/lock.cc b/mindspore/ccsrc/minddata/dataset/util/lock.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/lock.cc
rename to mindspore/ccsrc/minddata/dataset/util/lock.cc
index bde9d84005..5302196a46 100644
--- a/mindspore/ccsrc/dataset/util/lock.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/lock.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/lock.h"
+#include "minddata/dataset/util/lock.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/lock.h b/mindspore/ccsrc/minddata/dataset/util/lock.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/lock.h
rename to mindspore/ccsrc/minddata/dataset/util/lock.h
diff --git a/mindspore/ccsrc/dataset/util/memory_pool.cc b/mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/memory_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
index 5d66b4bd6d..0e1be9d798 100644
--- a/mindspore/ccsrc/dataset/util/memory_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
 #include "./securec.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/memory_pool.h b/mindspore/ccsrc/minddata/dataset/util/memory_pool.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/memory_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/memory_pool.h
index ee1da3bda1..c7cc473109 100644
--- a/mindspore/ccsrc/dataset/util/memory_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/memory_pool.h
@@ -19,7 +19,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/path.cc b/mindspore/ccsrc/minddata/dataset/util/path.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/path.cc
rename to mindspore/ccsrc/minddata/dataset/util/path.cc
index cdd2343799..8740ecb8e0 100644
--- a/mindspore/ccsrc/dataset/util/path.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/path.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/mindspore/ccsrc/dataset/util/path.h b/mindspore/ccsrc/minddata/dataset/util/path.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/path.h
rename to mindspore/ccsrc/minddata/dataset/util/path.h
index fbf65b8c23..8bc07ca8f3 100644
--- a/mindspore/ccsrc/dataset/util/path.h
+++ b/mindspore/ccsrc/minddata/dataset/util/path.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <string>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/queue.h b/mindspore/ccsrc/minddata/dataset/util/queue.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/queue.h
rename to mindspore/ccsrc/minddata/dataset/util/queue.h
index 7fca93d944..7a0a987499 100644
--- a/mindspore/ccsrc/dataset/util/queue.h
+++ b/mindspore/ccsrc/minddata/dataset/util/queue.h
@@ -26,10 +26,10 @@
 
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/services.h"
-#include "dataset/util/cond_var.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/cond_var.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
@@ -182,6 +182,9 @@ class Queue {
         arr_[k].~T();
       }
     }
+    for (uint64_t i = 0; i < sz_; i++) {
+      std::allocator_traits<Allocator<T>>::construct(alloc_, &(arr_[i]));
+    }
     empty_cv_.ResetIntrpState();
     full_cv_.ResetIntrpState();
     head_ = 0;
diff --git a/mindspore/ccsrc/dataset/util/random.h b/mindspore/ccsrc/minddata/dataset/util/random.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/random.h
rename to mindspore/ccsrc/minddata/dataset/util/random.h
index 957a4214a8..d2658f67ec 100644
--- a/mindspore/ccsrc/dataset/util/random.h
+++ b/mindspore/ccsrc/minddata/dataset/util/random.h
@@ -26,8 +26,8 @@
 #include <string>
 #include <thread>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/semaphore.cc b/mindspore/ccsrc/minddata/dataset/util/semaphore.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/util/semaphore.cc
rename to mindspore/ccsrc/minddata/dataset/util/semaphore.cc
index 36ddf5511d..5dadd98f3c 100644
--- a/mindspore/ccsrc/dataset/util/semaphore.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/semaphore.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/semaphore.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/semaphore.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/semaphore.h b/mindspore/ccsrc/minddata/dataset/util/semaphore.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/semaphore.h
rename to mindspore/ccsrc/minddata/dataset/util/semaphore.h
index 07b9e83e7f..d07398acb1 100644
--- a/mindspore/ccsrc/dataset/util/semaphore.h
+++ b/mindspore/ccsrc/minddata/dataset/util/semaphore.h
@@ -16,7 +16,7 @@
 #ifndef DATASET_UTIL_SEMAPHORE_H_
 #define DATASET_UTIL_SEMAPHORE_H_
 
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/cond_var.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/service.cc b/mindspore/ccsrc/minddata/dataset/util/service.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/service.cc
rename to mindspore/ccsrc/minddata/dataset/util/service.cc
index c89f7287f6..19d60ab47a 100644
--- a/mindspore/ccsrc/dataset/util/service.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/service.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/service.h"
+#include "minddata/dataset/util/service.h"
 #include <thread>
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/service.h b/mindspore/ccsrc/minddata/dataset/util/service.h
similarity index 94%
rename from mindspore/ccsrc/dataset/util/service.h
rename to mindspore/ccsrc/minddata/dataset/util/service.h
index 1113fc1d14..2b9c7197fe 100644
--- a/mindspore/ccsrc/dataset/util/service.h
+++ b/mindspore/ccsrc/minddata/dataset/util/service.h
@@ -17,8 +17,8 @@
 #define DATASET_UTIL_SERVICE_H_
 
 #include <atomic>
-#include "dataset/util/lock.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/services.cc b/mindspore/ccsrc/minddata/dataset/util/services.cc
similarity index 68%
rename from mindspore/ccsrc/dataset/util/services.cc
rename to mindspore/ccsrc/minddata/dataset/util/services.cc
index 6516deea41..547773e0f1 100644
--- a/mindspore/ccsrc/dataset/util/services.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/services.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/services.h"
 
 #include <limits.h>
 #if !defined(_WIN32) && !defined(_WIN64)
@@ -22,11 +22,11 @@
 #include <stdlib.h>
 #endif
 #include <unistd.h>
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/random.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/task_manager.h"
 
-#define SLOT_TASK_MGR 0
 namespace mindspore {
 namespace dataset {
 std::unique_ptr<Services> Services::instance_ = nullptr;
@@ -61,15 +61,25 @@ std::string Services::GetUniqueID() {
 
 TaskManager &Services::getTaskMgrInstance() {
   Services &sm = GetInstance();
-  return *(static_cast<TaskManager *>(sm.sa_[SLOT_TASK_MGR]));
+  return *(static_cast<TaskManager *>(sm.sa_[kSlotTaskMgr_]));
+}
+
+CacheServer &Services::getCacheServer() {
+  Services &sm = GetInstance();
+  return *(static_cast<CacheServer *>(sm.sa_[kSlotCacheMgr_]));
 }
 
 Status Services::CreateAllInstances() {
   // In order, TaskMgr, BufferMgr
   Status rc;
-  sa_[SLOT_TASK_MGR] = new (&rc, pool_) TaskManager();
+  sa_[kSlotTaskMgr_] = new (&rc, pool_) TaskManager();
   RETURN_IF_NOT_OK(rc);
-  rc = sa_[SLOT_TASK_MGR]->ServiceStart();
+  rc = sa_[kSlotTaskMgr_]->ServiceStart();
+  RETURN_IF_NOT_OK(rc);
+  // TODO(jesse) : Get the parameters from config file. Right now spill to /tmp and spawn 3 workers
+  sa_[kSlotCacheMgr_] = new (&rc, pool_) CacheServer("/tmp", 3);
+  RETURN_IF_NOT_OK(rc);
+  rc = sa_[kSlotCacheMgr_]->ServiceStart();
   return rc;
 }
 
@@ -83,8 +93,14 @@ Services::Services() : pool_(nullptr), sa_{nullptr} {
 Services::~Services() noexcept {
   try {
     // In reverse order
-    TaskManager *tm = static_cast<TaskManager *>(sa_[SLOT_TASK_MGR]);
-    if (tm) {
+    CacheServer *cs = static_cast<CacheServer *>(sa_[kSlotCacheMgr_]);
+    if (cs != nullptr) {
+      (void)cs->ServiceStop();
+      cs->~CacheServer();
+      pool_->Deallocate(cs);
+    }
+    TaskManager *tm = static_cast<TaskManager *>(sa_[kSlotTaskMgr_]);
+    if (tm != nullptr) {
       (void)tm->ServiceStop();
       tm->~TaskManager();
       pool_->Deallocate(tm);
diff --git a/mindspore/ccsrc/dataset/util/services.h b/mindspore/ccsrc/minddata/dataset/util/services.h
similarity index 88%
rename from mindspore/ccsrc/dataset/util/services.h
rename to mindspore/ccsrc/minddata/dataset/util/services.h
index e19f44dccc..c7adea0b6e 100644
--- a/mindspore/ccsrc/dataset/util/services.h
+++ b/mindspore/ccsrc/minddata/dataset/util/services.h
@@ -19,15 +19,15 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/service.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/service.h"
 
 #define UNIQUEID_LEN 36
 namespace mindspore {
 namespace dataset {
 class TaskManager;
-
+class CacheServer;
 class Services {
  public:
   static Status CreateInstance() {
@@ -61,6 +61,8 @@ class Services {
 
   static TaskManager &getTaskMgrInstance();
 
+  static CacheServer &getCacheServer();
+
   std::shared_ptr<MemoryPool> GetServiceMemPool() { return pool_; }
 
 #if !defined(_WIN32) && !defined(_WIN64)
@@ -87,7 +89,9 @@ class Services {
   // We use pointers here instead of unique_ptr because we
   // want to have ultimate control on the order of
   // construction and destruction.
-  static constexpr int kNumServices_ = 1;
+  static constexpr int kSlotTaskMgr_ = 0;
+  static constexpr int kSlotCacheMgr_ = 1;
+  static constexpr int kNumServices_ = 2;
   Service *sa_[kNumServices_];
 
   Services();
diff --git a/mindspore/ccsrc/dataset/util/sig_handler.cc b/mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/sig_handler.cc
rename to mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
index 644a633066..eed3b4ee4d 100644
--- a/mindspore/ccsrc/dataset/util/sig_handler.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/sig_handler.h"
+#include "minddata/dataset/util/sig_handler.h"
 #include <signal.h>
 #include <sys/types.h>
 #if !defined(_WIN32) && !defined(_WIN64)
 #include <ucontext.h>
 #endif
 #include <unistd.h>
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/sig_handler.h b/mindspore/ccsrc/minddata/dataset/util/sig_handler.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/sig_handler.h
rename to mindspore/ccsrc/minddata/dataset/util/sig_handler.h
diff --git a/mindspore/ccsrc/dataset/util/slice.cc b/mindspore/ccsrc/minddata/dataset/util/slice.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/slice.cc
rename to mindspore/ccsrc/minddata/dataset/util/slice.cc
index f1798b4f44..beff2b3dd2 100644
--- a/mindspore/ccsrc/dataset/util/slice.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/slice.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/util/slice.h"
+#include "minddata/dataset/util/slice.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/slice.h b/mindspore/ccsrc/minddata/dataset/util/slice.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/slice.h
rename to mindspore/ccsrc/minddata/dataset/util/slice.h
index 127df23cfa..1caee0f816 100644
--- a/mindspore/ccsrc/dataset/util/slice.h
+++ b/mindspore/ccsrc/minddata/dataset/util/slice.h
@@ -20,8 +20,8 @@
 #include <cstddef>
 #include <utility>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
 namespace mindspore {
 namespace dataset {
 /// \brief A ReadableSlice wraps a const pointer in memory and its size.
@@ -31,6 +31,10 @@ class ReadableSlice {
  public:
   ReadableSlice() : ptr_(nullptr), sz_(0) {}
   ReadableSlice(const void *ptr, size_t sz) : ptr_(ptr), sz_(sz) {}
+
+  /// \brief Destructor
+  ~ReadableSlice() = default;
+
   ReadableSlice(const ReadableSlice &src, off64_t offset, size_t len) {
     ptr_ = static_cast<const char *>(src.GetPointer()) + offset;
     sz_ = len;
@@ -89,6 +93,8 @@ class WritableSlice : public ReadableSlice {
   WritableSlice(const WritableSlice &src, off64_t offset, size_t len);
   WritableSlice(const WritableSlice &src, off64_t offset);
   WritableSlice(const WritableSlice &lhs) : ReadableSlice(lhs) { mutable_data_ = lhs.mutable_data_; }
+  /// \brief Destructor
+  ~WritableSlice() = default;
   WritableSlice &operator=(const WritableSlice &lhs) {
     if (this != &lhs) {
       mutable_data_ = lhs.mutable_data_;
diff --git a/mindspore/ccsrc/dataset/util/status.cc b/mindspore/ccsrc/minddata/dataset/util/status.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/status.cc
rename to mindspore/ccsrc/minddata/dataset/util/status.cc
index 27e9dfbc83..3fc498b701 100644
--- a/mindspore/ccsrc/dataset/util/status.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/status.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include <sstream>
 #include "common/utils.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/status.h b/mindspore/ccsrc/minddata/dataset/util/status.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/status.h
rename to mindspore/ccsrc/minddata/dataset/util/status.h
diff --git a/mindspore/ccsrc/dataset/util/storage_container.cc b/mindspore/ccsrc/minddata/dataset/util/storage_container.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/storage_container.cc
rename to mindspore/ccsrc/minddata/dataset/util/storage_container.cc
index 3a4c13e2d9..506495227d 100644
--- a/mindspore/ccsrc/dataset/util/storage_container.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_container.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/storage_container.h"
+#include "minddata/dataset/util/storage_container.h"
 
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <vector>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/storage_container.h b/mindspore/ccsrc/minddata/dataset/util/storage_container.h
similarity index 90%
rename from mindspore/ccsrc/dataset/util/storage_container.h
rename to mindspore/ccsrc/minddata/dataset/util/storage_container.h
index 07e41bd66a..a304012b60 100644
--- a/mindspore/ccsrc/dataset/util/storage_container.h
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_container.h
@@ -22,11 +22,11 @@
 #include <mutex>
 #include <string>
 #include <vector>
-#include "dataset/util/system_pool.h"
-#include "dataset/util/buddy.h"
-#include "dataset/util/path.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/buddy.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/storage_manager.cc b/mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/storage_manager.cc
rename to mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
index 1d958576ba..2f85d00a45 100644
--- a/mindspore/ccsrc/dataset/util/storage_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/storage_manager.h"
+#include "minddata/dataset/util/storage_manager.h"
 
 #include <iomanip>
 #include <sstream>
 #include <stdexcept>
 #include <utility>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/services.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/storage_manager.h b/mindspore/ccsrc/minddata/dataset/util/storage_manager.h
similarity index 85%
rename from mindspore/ccsrc/dataset/util/storage_manager.h
rename to mindspore/ccsrc/minddata/dataset/util/storage_manager.h
index 075ac713d2..e79e7c6e63 100644
--- a/mindspore/ccsrc/dataset/util/storage_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_manager.h
@@ -21,14 +21,14 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/path.h"
-#include "dataset/util/service.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/storage_container.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/storage_container.h"
 
 using ListOfContainers = std::vector<std::shared_ptr<mindspore::dataset::StorageContainer>>;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/system_pool.h b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/system_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/system_pool.h
index 286e30a615..3a7e61d16b 100644
--- a/mindspore/ccsrc/dataset/util/system_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <new>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/memory_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/task.cc b/mindspore/ccsrc/minddata/dataset/util/task.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/task.cc
rename to mindspore/ccsrc/minddata/dataset/util/task.cc
index 93db55d5f9..39d754e806 100644
--- a/mindspore/ccsrc/dataset/util/task.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/task.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/task.h"
+#include "minddata/dataset/util/task.h"
 #include "common/utils.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/task.h b/mindspore/ccsrc/minddata/dataset/util/task.h
similarity index 93%
rename from mindspore/ccsrc/dataset/util/task.h
rename to mindspore/ccsrc/minddata/dataset/util/task.h
index 49eb16b182..9309a3de7b 100644
--- a/mindspore/ccsrc/dataset/util/task.h
+++ b/mindspore/ccsrc/minddata/dataset/util/task.h
@@ -27,11 +27,11 @@
 #include <stdexcept>
 #include <string>
 #include <thread>
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/list.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/services.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/list.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/wait_post.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/task_manager.cc b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/task_manager.cc
rename to mindspore/ccsrc/minddata/dataset/util/task_manager.cc
index 3965e35564..fefea0b97c 100644
--- a/mindspore/ccsrc/dataset/util/task_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
@@ -17,7 +17,7 @@
 #include <functional>
 #include <set>
 #include "./securec.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/task_manager.h b/mindspore/ccsrc/minddata/dataset/util/task_manager.h
similarity index 94%
rename from mindspore/ccsrc/dataset/util/task_manager.h
rename to mindspore/ccsrc/minddata/dataset/util/task_manager.h
index 5961c9000e..3030390bab 100644
--- a/mindspore/ccsrc/dataset/util/task_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.h
@@ -25,12 +25,12 @@
 #include <memory>
 #include <string>
 #include <set>
-#include "dataset/util/allocator.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/treap.h b/mindspore/ccsrc/minddata/dataset/util/treap.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/treap.h
rename to mindspore/ccsrc/minddata/dataset/util/treap.h
diff --git a/mindspore/ccsrc/dataset/util/wait_post.cc b/mindspore/ccsrc/minddata/dataset/util/wait_post.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/util/wait_post.cc
rename to mindspore/ccsrc/minddata/dataset/util/wait_post.cc
index 204f203d9a..944d9ca245 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/wait_post.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/wait_post.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/wait_post.h b/mindspore/ccsrc/minddata/dataset/util/wait_post.h
similarity index 92%
rename from mindspore/ccsrc/dataset/util/wait_post.h
rename to mindspore/ccsrc/minddata/dataset/util/wait_post.h
index 4e60995bd9..afd3bea38b 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.h
+++ b/mindspore/ccsrc/minddata/dataset/util/wait_post.h
@@ -17,8 +17,8 @@
 #define DATASET_UTIL_WAIT_POST_H_
 
 #include <mutex>
-#include "dataset/util/cond_var.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/cond_var.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/mindrecord/CMakeLists.txt b/mindspore/ccsrc/minddata/mindrecord/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/mindrecord/CMakeLists.txt
rename to mindspore/ccsrc/minddata/mindrecord/CMakeLists.txt
diff --git a/mindspore/ccsrc/mindrecord/common/shard_error.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/common/shard_error.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
index ad68aaf92c..e4d35b8305 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_error.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_error.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/common/shard_pybind.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/common/shard_pybind.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
index ee923ebc97..d9e51efc4e 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_pybind.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
@@ -17,12 +17,12 @@
 #include <string>
 #include <vector>
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_segment.h"
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 #include "nlohmann/json.hpp"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
diff --git a/mindspore/ccsrc/mindrecord/common/shard_utils.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/common/shard_utils.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
index edeabb3cde..b5021802a0 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_utils.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "common/utils.h"
 #include "./securec.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/common/shard_pybind.h b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
similarity index 95%
rename from mindspore/ccsrc/mindrecord/include/common/shard_pybind.h
rename to mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
index 86c71a0ea7..3b3698ca68 100644
--- a/mindspore/ccsrc/mindrecord/include/common/shard_pybind.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 
 namespace py = pybind11;
diff --git a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
similarity index 99%
rename from mindspore/ccsrc/mindrecord/include/common/shard_utils.h
rename to mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
index 8aa5bdfbda..bd1cda8a99 100644
--- a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
@@ -41,7 +41,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "nlohmann/json.hpp"
 #include "./sqlite3.h"
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/mindrecord/include/shard_category.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_category.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
index 618a91b1d8..ed1e748afe 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_category.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_column.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
similarity index 99%
rename from mindspore/ccsrc/mindrecord/include/shard_column.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
index 968d82e717..f6353ed3ce 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_column.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
similarity index 91%
rename from mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
index ef0ad738c4..f166ec1e6c 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_error.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_error.h
similarity index 100%
rename from mindspore/ccsrc/mindrecord/include/shard_error.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_error.h
diff --git a/mindspore/ccsrc/mindrecord/include/shard_header.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
similarity index 94%
rename from mindspore/ccsrc/mindrecord/include/shard_header.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
index e4361c466a..67169e8696 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_header.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_page.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_index.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
similarity index 90%
rename from mindspore/ccsrc/mindrecord/include/shard_index.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
index d430c5bdcf..79b10893fb 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_index.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
@@ -24,9 +24,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_schema.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_index_generator.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
index b081b7a0a0..fb85d9adbc 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
@@ -25,7 +25,7 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 #include "./sqlite3.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_operator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_operator.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
index f33e3db5f4..b5ea53b759 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_operator.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
@@ -18,7 +18,7 @@
 #define MINDRECORD_INCLUDE_SHARD_OPERATOR_H_
 
 #include <memory>
-#include "mindrecord/include/shard_task.h"
+#include "minddata/mindrecord/include/shard_task.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_page.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_page.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
index c22acd8d2c..01c70acf29 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_page.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
@@ -23,7 +23,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_pk_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
similarity index 89%
rename from mindspore/ccsrc/mindrecord/include/shard_pk_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
index 4f1a1c307a..2d420b563d 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_pk_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
-#include "mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_reader.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_reader.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
index 1f2138d6d5..b1b0c1397a 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_reader.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
@@ -42,16 +42,16 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
similarity index 93%
rename from mindspore/ccsrc/mindrecord/include/shard_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
index a32acbff6e..ce813bc4bf 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_schema.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
similarity index 94%
rename from mindspore/ccsrc/mindrecord/include/shard_schema.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
index 4ef134bde2..56eae85e5a 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_schema.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
@@ -22,9 +22,9 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "mindrecord/include/common/shard_pybind.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_pybind.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_segment.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_segment.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
index 12497a5ace..45d9bda338 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_segment.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
@@ -21,7 +21,7 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
index a8ee3a36db..724be9acaf 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_shuffle.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
index adb172bdcc..d7f736b55b 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
@@ -18,7 +18,7 @@
 #define MINDRECORD_INCLUDE_SHARD_SHUFFLE_H_
 
 #include <random>
-#include "mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_statistics.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
similarity index 93%
rename from mindspore/ccsrc/mindrecord/include/shard_statistics.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
index 7fc2f968cd..f100bb9833 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_statistics.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
@@ -24,9 +24,9 @@
 #include <string>
 #include <vector>
 
-#include "mindrecord/include/common/shard_pybind.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_pybind.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_task.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_task.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
index 4a12eb9e45..f07da656f2 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_task.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <tuple>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_writer.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_writer.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
index 6175180c92..833928773e 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_writer.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
@@ -35,11 +35,11 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_index.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
index 16c730bd4c..f9b18a3bf0 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
@@ -15,7 +15,7 @@
  */
 #include <thread>
 
-#include "mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_reader.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
index 99fa0c447d..84d7fddb6f 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_segment.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_segment.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
index fb1120b178..eda8924e13 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_segment.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_segment.h"
 #include "common/utils.h"
 
 #include "./securec.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_writer.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_writer.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
index 913caab550..e85229cc34 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_writer.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "./securec.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_category.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_category.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
index bd427a330a..eb1428a2ad 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_category.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_category.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_column.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/meta/shard_column.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
index 28dc243e17..4cc5e9f413 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_column.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_column.h"
 
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
index b7e890da7c..4c7abbb4b4 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_header.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/meta/shard_header.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
index ec177394ef..500037399b 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_header.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 
 #include <map>
 #include <memory>
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_page.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_index.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
similarity index 95%
rename from mindspore/ccsrc/mindrecord/meta/shard_index.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
index 8b7a3c0342..73397b5bba 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_index.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_index.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_page.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_page.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
index 6bb849ae1d..ba2292415f 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_page.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_page.h"
 #include "pybind11/pybind11.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
index fac2fec708..081a48352d 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
index c207747194..808ab55bfb 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_schema.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_schema.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
index ee0f5afa4a..093be9792f 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_schema.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_schema.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
index a7fa4e7343..3aa695e03b 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_sequential_sample.h"
+#include "minddata/mindrecord/include/shard_sequential_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
index 5cf49b04f0..7743cabea3 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 
 #include <algorithm>
 
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_statistics.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_statistics.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
index ca36c50863..7024a2ab06 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_statistics.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "pybind11/pybind11.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_task.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_task.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
index 8baa3c26cd..6f8e440f91 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_task.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_task.h"
+#include "minddata/mindrecord/include/shard_task.h"
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h b/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h
deleted file mode 100644
index 45b00aed30..0000000000
--- a/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
-#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
-
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/ops_info/batch_parallel_info.h"
-#include "parallel/ops_info/bias_add_info.h"
-#include "parallel/ops_info/comparison_function_info.h"
-#include "parallel/ops_info/dropout_do_mask_info.h"
-#include "parallel/ops_info/elementary_function_info.h"
-#include "parallel/ops_info/gather_v2_info.h"
-#include "parallel/ops_info/get_next_info.h"
-#include "parallel/ops_info/l2_normalize_info.h"
-#include "parallel/ops_info/layer_norm_info.h"
-#include "parallel/ops_info/loss_info.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/ops_info/prelu_info.h"
-#include "parallel/ops_info/reduce_method_info.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/ops_info/transpose_info.h"
-#include "parallel/ops_info/virtual_dataset_info.h"
-#include "parallel/ops_info/gather_v2_p_info.h"
-
-#endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_HEAD_FILES_H_
diff --git a/mindspore/ccsrc/pipeline/CMakeLists.txt b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
similarity index 90%
rename from mindspore/ccsrc/pipeline/CMakeLists.txt
rename to mindspore/ccsrc/pipeline/jit/CMakeLists.txt
index 39664d717d..6188546ce5 100644
--- a/mindspore/ccsrc/pipeline/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
@@ -24,4 +24,4 @@ if (ENABLE_GE OR ENABLE_D)
     list(APPEND _PIPELINE_SRC_FILES ${_PIPELINE_GE_SRC_FILES})
 endif ()
 
-add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_SRC_FILES})
+add_library(_mindspore_pipeline_jit_obj OBJECT ${_PIPELINE_SRC_FILES})
diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc
similarity index 93%
rename from mindspore/ccsrc/pipeline/action.cc
rename to mindspore/ccsrc/pipeline/jit/action.cc
index 89598ae85d..74eb9f3f9b 100644
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/action.h"
+#include "pipeline/jit/action.h"
 
 #include <memory>
 #include <utility>
@@ -24,22 +24,22 @@
 #include <functional>
 
 #include "ir/func_graph_cloner.h"
-#include "ir/param_value_py.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/context.h"
-#include "pipeline/pass.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/program_specialize.h"
-#include "pipeline/resource.h"
+#include "ir/param_value.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/context.h"
+#include "pipeline/jit/pass.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
-#include "pipeline/remove_value_node_dup.h"
-#include "optimizer/optimizer.h"
+#include "pipeline/jit/remove_value_node_dup.h"
+#include "frontend/optimizer/optimizer.h"
 #include "vm/transform.h"
 #include "parse/python_adapter.h"
-#include "optimizer/py_pass_manager.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 namespace mindspore {
 namespace pipeline {
@@ -228,14 +228,10 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
   for (const auto &param : func_graph->parameters()) {
     auto param_node = std::static_pointer_cast<Parameter>(param);
     if (param_node->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-      AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
-      auto sparse_grad =
-        py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
-      ptr->set_sparse_grad(sparse_grad);
-      auto has_indexed_slices_grad =
-        py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "has_indexed_slices_grad"));
-      ptr->set_has_indexed_slices_grad(has_indexed_slices_grad);
+      const auto &param_value = param_node->default_param();
+      ValuePtr value = param_value->value();
+      constexpr bool broaden = true;
+      AbstractBasePtr ptr = abstract::FromValue(value, broaden);
 
       parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
       args_spec.push_back(ptr);
@@ -439,7 +435,7 @@ bool ResolveActionPyStub(const ResourcePtr &res) {
 }
 
 bool OptActionPyStub(const ResourcePtr &res) {
-  ActionPyStub(res, opt::python_pass::Phase::RESOLVE);
+  ActionPyStub(res, opt::python_pass::Phase::OPT);
   return true;
 }
 
diff --git a/mindspore/ccsrc/pipeline/action.h b/mindspore/ccsrc/pipeline/jit/action.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/action.h
rename to mindspore/ccsrc/pipeline/jit/action.h
index eed1307872..0a1feab1c9 100644
--- a/mindspore/ccsrc/pipeline/action.h
+++ b/mindspore/ccsrc/pipeline/jit/action.h
@@ -21,7 +21,7 @@
 #include <functional>
 #include <utility>
 #include <string>
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "vm/segment_runner.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/base.h b/mindspore/ccsrc/pipeline/jit/base.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/base.h
rename to mindspore/ccsrc/pipeline/jit/base.h
index 57edea03a2..0a8a2b75f3 100644
--- a/mindspore/ccsrc/pipeline/base.h
+++ b/mindspore/ccsrc/pipeline/jit/base.h
@@ -23,7 +23,7 @@
 #include <sstream>
 
 #include "ir/anf.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc
similarity index 95%
rename from mindspore/ccsrc/pipeline/init.cc
rename to mindspore/ccsrc/pipeline/jit/init.cc
index f28be181dd..65adebb6e2 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@@ -16,29 +16,28 @@
 
 #include <pybind11/operators.h>
 #include <pybind11/stl.h>
-#include "kernel/oplib/oplib.h"
-#include "kernel/oplib/oploader.h"
-#include "pipeline/pipeline.h"
-#include "operator/composite/composite.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oploader.h"
+#include "pipeline/jit/pipeline.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/signature.h"
-#include "pynative/pynative_execute.h"
+#include "pipeline/pynative/pynative_execute.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/summary/event_writer.h"
 #include "utils/config_manager.h"
 #include "utils/mpi/mpi_config.h"
-#include "parallel/context.h"
-#include "parallel/device_manager.h"
-#include "parallel/costmodel_context.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/costmodel_context.h"
 #ifdef ENABLE_GPU_COLLECTIVE
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #else
-#include "device/gpu/distribution/collective_fake_init.h"
+#include "runtime/device/gpu/distribution/collective_fake_init.h"
 #endif
 namespace py = pybind11;
 
-using FuncGraph = mindspore::FuncGraph;
 using EnvInstance = mindspore::EnvInstance;
 using ExecutorPy = mindspore::pipeline::ExecutorPy;
 using Pipeline = mindspore::pipeline::Pipeline;
@@ -54,10 +53,6 @@ using CostModelContext = mindspore::parallel::CostModelContext;
 PYBIND11_MODULE(_c_expression, m) {
   m.doc() = "MindSpore c plugin";
 
-  (void)py::class_<MetaFuncGraph, std::shared_ptr<MetaFuncGraph>>(*m, "MetaFuncGraph_")
-    .def_readonly(mindspore::PYTHON_METAFUNCGRAPH_FLAG, &mindspore::MetaFuncGraph::parse_info_)
-    .def(py::init<std::string &>());
-
   auto fns = mindspore::PybindDefineRegister::AllFuncs();
   for (auto &item : fns) {
     item.second(&m);
@@ -85,8 +80,6 @@ PYBIND11_MODULE(_c_expression, m) {
          py::arg("broadcast_params") = py::dict(), "Build data graph.")
     .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.")
     .def("run_init_graph", &ExecutorPy::RunInitGraph, "Run init Graph.");
-  // Class Graph interface
-  (void)py::class_<FuncGraph, mindspore::FuncGraphPtr>(m, "FuncGraph").def(py::init());
 
   (void)py::class_<EnvInstance, std::shared_ptr<EnvInstance>>(m, "EnvInstance_")
     .def_readonly(mindspore::PYTHON_ENVINSTANCE_FLAG, &mindspore::EnvInstance::parse_info_)
@@ -155,8 +148,8 @@ PYBIND11_MODULE(_c_expression, m) {
     .def("set_enable_graph_kernel", &mindspore::MsContext::set_enable_graph_kernel,
          "Set the GraphKernel switch to on or off.")
     .def("get_enable_graph_kernel", &mindspore::MsContext::enable_graph_kernel, "Get the value of GraphKernel switch.")
-    .def("get_enable_sparse_flag", &mindspore::MsContext::enable_sparse_flag, "Get whether to enable sparse.")
-    .def("set_enable_sparse_flag", &mindspore::MsContext::set_enable_sparse_flag, "Set whether to enable sparse.");
+    .def("get_enable_sparse", &mindspore::MsContext::enable_sparse, "Get whether to enable sparsity.")
+    .def("set_enable_sparse", &mindspore::MsContext::set_enable_sparse, "Set whether to enable sparsity.");
 
   (void)py::class_<mindspore::MpiConfig, std::shared_ptr<mindspore::MpiConfig>>(m, "MpiConfig")
     .def_static("get_instance", &mindspore::MpiConfig::GetInstance, "Get mpi config instance.")
@@ -323,7 +316,7 @@ PYBIND11_MODULE(_c_expression, m) {
 
   (void)py::class_<OpLib, std::shared_ptr<OpLib>>(m, "Oplib")
     .def(py::init())
-    .def("reg_op", &OpLib::RegOp, "Register op info.");
+    .def_static("reg_op", &OpLib::RegOp, "Register op info.");
 #ifdef ENABLE_GPU_COLLECTIVE
   (void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::InitCollective,
               "Init gpu collective communication mode.");
diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.cc b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/data_converter.cc
rename to mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
index 330d03d11c..baef64481b 100644
--- a/mindspore/ccsrc/pipeline/parse/data_converter.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include <unordered_map>
 #include <map>
 #include <utility>
@@ -24,15 +24,15 @@
 #include <memory>
 #include <vector>
 #include <list>
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/python_adapter.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/func_graph_cloner.h"
 #include "utils/symbolic.h"
 #include "utils/context/ms_context.h"
 #include "debug/trace.h"
-#include "optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/grad.h"
 
 namespace mindspore {
 namespace parse {
diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.h b/mindspore/ccsrc/pipeline/jit/parse/data_converter.h
similarity index 95%
rename from mindspore/ccsrc/pipeline/parse/data_converter.h
rename to mindspore/ccsrc/pipeline/jit/parse/data_converter.h
index 0165b55363..6632d4801e 100644
--- a/mindspore/ccsrc/pipeline/parse/data_converter.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/data_converter.h
@@ -24,8 +24,8 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/parse/function_block.cc b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/parse/function_block.cc
rename to mindspore/ccsrc/pipeline/jit/parse/function_block.cc
index fbeeba94a1..b52dddda66 100644
--- a/mindspore/ccsrc/pipeline/parse/function_block.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
@@ -16,13 +16,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/function_block.h"
+#include "pipeline/jit/parse/function_block.h"
 #include <string>
 #include <memory>
 #include <fstream>
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/parse.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/operator/ops.h"
 #include "debug/info.h"
 #include "debug/trace.h"
 #include "pybind11/pybind11.h"
@@ -294,13 +294,18 @@ void FunctionBlock::Jump(const FunctionBlockPtr &target_block, AnfNodePtr node)
 // Perform a conditional jump using switch operation.
 // The first CNode select graph with condition, and than execute this graph
 void FunctionBlock::ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &true_block,
-                                    const FunctionBlockPtr &false_block) {
+                                    const FunctionBlockPtr &false_block, bool unroll_loop) {
   if (func_graph()->get_return() != nullptr) {
     MS_LOG(EXCEPTION) << "Failure: have return node! NodeInfo: "
                       << trace::GetDebugInfo(func_graph()->get_return()->debug_info());
   }
+  // Here we need set an attribute to primtive 'switch', so we create a new variable instead of global 'kPrimSwitch'
+  auto prim_switch = std::make_shared<Primitive>(prim::kPrimSwitch->name());
+  if (!unroll_loop) {
+    prim_switch->AddAttr(prim::SWITCH_UNROLL_FLAG, MakeValue(0));
+  }
   CNodePtr switch_app =
-    func_graph()->NewCNode({NewValueNode(prim::kPrimSwitch), condNode, NewValueNode(true_block->func_graph()),
+    func_graph()->NewCNode({NewValueNode(prim_switch), condNode, NewValueNode(true_block->func_graph()),
                             NewValueNode(false_block->func_graph())});
   CNodePtr switch_app_new = func_graph()->NewCNode({switch_app});
   func_graph()->set_output(switch_app_new);
diff --git a/mindspore/ccsrc/pipeline/parse/function_block.h b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/parse/function_block.h
rename to mindspore/ccsrc/pipeline/jit/parse/function_block.h
index 346061430d..cbf75a3dd8 100644
--- a/mindspore/ccsrc/pipeline/parse/function_block.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
@@ -26,7 +26,7 @@
 #include <unordered_map>
 #include <memory>
 #include <utility>
-#include "pipeline/parse/parse_base.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "utils/log_adapter.h"
 #include "utils/ordered_map.h"
 
@@ -59,7 +59,8 @@ class FunctionBlock : public std::enable_shared_from_this<FunctionBlock> {
   CNodePtr ForceToWhileCond(const AnfNodePtr &cond);
   void Jump(const FunctionBlockPtr &block, AnfNodePtr node);
   AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi);
-  void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock);
+  void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock,
+                       bool unroll_loop = true);
   // record the assign statement of self.xx weight parameter ,which will use state_setitem op
   void SetStateAssgin(const AnfNodePtr &target, const std::string &readid);
   void AddAutoDepend(const AnfNodePtr &target);
diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
similarity index 91%
rename from mindspore/ccsrc/pipeline/parse/parse.cc
rename to mindspore/ccsrc/pipeline/jit/parse/parse.cc
index 77e865cee9..edc9a66594 100644
--- a/mindspore/ccsrc/pipeline/parse/parse.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
@@ -16,15 +16,15 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include <string>
 #include <memory>
 #include <sstream>
 #include <unordered_map>
 #include <algorithm>
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/context/ms_context.h"
 #include "debug/trace.h"
 
@@ -1002,6 +1002,7 @@ CNodePtr Parser::GenerateIteratorInFor(const FunctionBlockPtr &block, const py::
   AnfNodePtr iter_anf_node = ParseExprNode(block, iter_node);
   return block->func_graph()->NewCNode({op_iter, iter_anf_node});
 }
+
 CNodePtr Parser::GenerateCondInFor(const ParameterPtr &iter_param, const FunctionBlockPtr &header_block,
                                    const AnfNodePtr &op_hasnext) {
   MS_EXCEPTION_IF_NULL(header_block);
@@ -1018,12 +1019,57 @@ FunctionBlockPtr Parser::GenerateBlockInFor(const TraceInfoPtr &trace_info) {
 // A for loop will generate 3 functions :the test, the body, and the continuation
 // for x in xs:
 //    body
-// it  compiled to be following statement
+// it is compiled to be following statement
+// if len(xs) < max_loop_cnt:
+//    ParseForIter()  // use iter to implement for loop, which always unroll loop
+// else:
+//    ParseForLoop()  // use loop var to implement for loop, which always sink loop
+FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::object &node) {
+  MS_LOG(DEBUG) << "Process ast For, create an if else statement";
+  MS_EXCEPTION_IF_NULL(block);
+  // create statement 'len(xs) < prim::MAX_FOR_LOOP_COUNT'
+  AnfNodePtr op_len = block->MakeResolveSymbol(NAMED_PRIMITIVE_LEN);
+  py::object iter_obj = python_adapter::GetPyObjAttr(node, NAMED_PRIMITIVE_ITER);
+  AnfNodePtr iter_node = ParseExprNode(block, iter_obj);
+  CNodePtr len_iter = block->func_graph()->NewCNode({op_len, iter_node});
+  CNodePtr bool_node = block->func_graph()->NewCNode(
+    {NewValueNode(prim::kPrimScalarLt), len_iter, NewValueNode(prim::MAX_FOR_LOOP_COUNT)});
+
+  // create statement 'if len(xs) < prim::MAX_FOR_LOOP_COUNT then ParseForIter else ParseForLoop'
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtTrueBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr true_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtFalseBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr false_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  MakeConditionBlocks(block, true_block, false_block);
+
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtAfterBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr after_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  FunctionBlockPtr true_end = ParseForIter(true_block, node);
+  true_end->Jump(after_block, nullptr);
+
+  FunctionBlockPtr false_end = ParseForLoop(false_block, node);
+  false_end->Jump(after_block, nullptr);
+
+  block->ConditionalJump(bool_node, true_block, false_block);
+  after_block->Mature();
+  return after_block;
+}
+
+// A for loop will generate 3 functions :the test, the body, and the continuation
+// for x in xs:
+//    body
+// it is compiled to be following statement
 // it = iter(xs)
 // while hastnext(it)
 //    x, it = next(it)
 //    body
-FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::object &node) {
+FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::object &node) {
   MS_LOG(DEBUG) << "Process ast For";
   MS_EXCEPTION_IF_NULL(block);
   AnfNodePtr op_iter = block->MakeResolveOperation(NAMED_PRIMITIVE_ITER);
@@ -1088,6 +1134,88 @@ FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::objec
   // No 'break', no end_block.
   return after_block;
 }
+
+// A for loop will generate 3 functions :the test, the body, and the continuation
+// for x in xs:
+//    body
+// it is compiled to be following statement
+// i = 0
+// while i < len(xs)
+//    x = xs[i]
+//    i = i + 1
+//    body
+FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::object &node) {
+  MS_LOG(DEBUG) << "Process ast For by loop variable";
+  MS_EXCEPTION_IF_NULL(block);
+  AnfNodePtr op_len = block->MakeResolveSymbol(NAMED_PRIMITIVE_LEN);
+  AnfNodePtr op_getitem = block->MakeResolveOperation(NAMED_PRIMITIVE_GETITEM);
+
+  // get varibale name of 'x' in statement 'for x in xs'
+  py::object target_node = python_adapter::GetPyObjAttr(node, "target");
+
+  // create statement 'len(xs)'
+  py::object iter_obj = python_adapter::GetPyObjAttr(node, "iter");
+  AnfNodePtr iter_node = ParseExprNode(block, iter_obj);
+  MS_EXCEPTION_IF_NULL(iter_node);
+  CNodePtr len_iter = block->func_graph()->NewCNode({op_len, iter_node});
+
+  FunctionBlockPtr header_block =
+    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  MS_EXCEPTION_IF_NULL(header_block);
+  // create loop variable 'i'
+  ParameterPtr loop_var = header_block->func_graph()->add_parameter();
+  // create loop condition 'i < len(xs)'
+  CNodePtr cond_node = header_block->func_graph()->NewCNode({NewValueNode(prim::kPrimScalarLt), loop_var, len_iter});
+
+  // generate the body of the for statement
+  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  MS_EXCEPTION_IF_NULL(body_block);
+  body_block->AddPrevBlock(header_block);
+  // create 'x = xs[i]'
+  CNodePtr target_var = body_block->func_graph()->NewCNode({op_getitem, iter_node, loop_var});
+  WriteAssignVars(body_block, target_node, target_var);
+  // create 'i = i + 1'
+  CNodePtr loop_var_inc =
+    body_block->func_graph()->NewCNode({NewValueNode(prim::kPrimScalarAdd), loop_var, NewValueNode(1)});
+  body_block->WriteVariable(loop_var->name(), loop_var_inc);
+
+  // link the variable name with the target
+  auto it_info = std::make_shared<TraceIterator>(loop_var_inc->debug_info());
+  loop_var->debug_info()->set_trace_info(it_info);
+  len_iter->debug_info()->set_trace_info(it_info);
+
+  TraceManager::DebugTrace(std::make_shared<TraceForAfter>(block->func_graph()->debug_info()));
+  FunctionBlockPtr after_block = MakeFunctionBlock(*this);
+  MS_EXCEPTION_IF_NULL(after_block);
+  TraceManager::EndTrace();
+  after_block->AddPrevBlock(header_block);
+
+  block->Jump(header_block, NewValueNode(0));
+  body_block->Mature();
+
+  header_block->ConditionalJump(cond_node, body_block, after_block, false);
+
+  // Parse loop body statements with loop context.
+  LoopContext loop_context{&loops_, header_block, loop_var_inc};
+  py::object body_node = python_adapter::GetPyObjAttr(node, "body");
+  FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
+  if (after_body_block->func_graph()->get_return() == nullptr) {
+    after_body_block->Jump(header_block, loop_var_inc);
+  }
+
+  header_block->Mature();
+  after_block->Mature();
+  auto &end_block = loop_context.EndBlock();
+  if (end_block) {
+    // end_block exists if we encounter 'break' in loop body.
+    after_block->Jump(end_block, nullptr);
+    end_block->Mature();
+    return end_block;
+  }
+  // No 'break', no end_block.
+  return after_block;
+}
+
 AnfNodePtr Parser::ParseIfExp(const FunctionBlockPtr &block, const py::object &node) {
   MS_LOG(DEBUG) << "Process ast IfExp";
   MS_EXCEPTION_IF_NULL(block);
diff --git a/mindspore/ccsrc/pipeline/parse/parse.h b/mindspore/ccsrc/pipeline/jit/parse/parse.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/parse/parse.h
rename to mindspore/ccsrc/pipeline/jit/parse/parse.h
index 19c503c6d0..90e965389f 100644
--- a/mindspore/ccsrc/pipeline/parse/parse.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.h
@@ -27,9 +27,9 @@
 #include <memory>
 #include "utils/misc.h"
 #include "ir/anf.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/function_block.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/function_block.h"
 
 namespace mindspore {
 namespace parse {
@@ -106,6 +106,8 @@ class Parser {
   FunctionBlockPtr ParseWhile(const FunctionBlockPtr &block, const py::object &node);
   // process a for statement
   FunctionBlockPtr ParseFor(const FunctionBlockPtr &block, const py::object &node);
+  FunctionBlockPtr ParseForIter(const FunctionBlockPtr &block, const py::object &node);
+  FunctionBlockPtr ParseForLoop(const FunctionBlockPtr &block, const py::object &node);
   // process a function def statement
   FunctionBlockPtr ParseFunctionDef(const FunctionBlockPtr &block, const py::object &node);
   // process a augment assign
diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/parse/parse_base.h
rename to mindspore/ccsrc/pipeline/jit/parse/parse_base.h
index 4961ab78c0..bdd79d00bd 100644
--- a/mindspore/ccsrc/pipeline/parse/parse_base.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h
@@ -87,6 +87,7 @@ const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj";
 const char PYTHON_MOD_GET_DEFAULT_INPUT[] = "get_default_input";
 
 // define the common name
+const char NAMED_PRIMITIVE_LEN[] = "len";
 const char NAMED_PRIMITIVE_ITER[] = "iter";
 const char NAMED_PRIMITIVE_NEXT[] = "next";
 const char NAMED_PRIMITIVE_GETITEM[] = "getitem";
diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.cc b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/python_adapter.cc
rename to mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
index df2f7d0d45..17be74b2a1 100644
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include <map>
 #include <memory>
 #include <string>
diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.h b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/python_adapter.h
rename to mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
index 98adcd4f73..0f49539bc8 100644
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
@@ -24,7 +24,7 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
-#include "pipeline/parse/parse_base.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
similarity index 87%
rename from mindspore/ccsrc/pipeline/parse/resolve.cc
rename to mindspore/ccsrc/pipeline/jit/parse/resolve.cc
index 87c2f78b42..8d4c402639 100644
--- a/mindspore/ccsrc/pipeline/parse/resolve.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/parse/resolve.h"
 
 #include <string>
 #include <memory>
 #include <vector>
 #include <algorithm>
 
-#include "ir/param_value_py.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/any.h"
-#include "operator/ops.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass.h"
 #include "./common.h"
 
 namespace mindspore {
@@ -103,16 +103,12 @@ AnfNodePtr ResolveParameterObj(const FuncGraphPtr &func_graph, const py::object
   }
   if (para_node == nullptr) {
     auto node = top_graph->AddWeightParameter(param_name);
-    auto param_value_new = std::make_shared<ParamValuePy>(obj);
-    node->set_default_param(param_value_new);
-
+    auto param_value = py::cast<ParamValuePtr>(python_adapter::GetPyObjAttr(obj, "_value"));
+    node->set_default_param(param_value);
     // set_abstract for parameter
-    auto to_convert = py::cast<py::object>(python_adapter::GetPyObjAttr(obj, "default_input"));
-    ValuePtr converted = nullptr;
-    (void)ConvertData(to_convert, &converted);
-    bool broaden = true;
-    node->set_abstract(abstract::FromValue(converted, broaden));
-
+    ValuePtr value = param_value->value();
+    constexpr bool broaden = true;
+    node->set_abstract(abstract::FromValue(value, broaden));
     para_node = node;
   }
   auto iter = func_graph->make_ref_params().find(para_node);
@@ -172,15 +168,15 @@ bool ResolveObjectToNode(const FuncGraphPtr &func_graph, const py::object &obj,
   return true;
 }
 
-bool IsAllGraphInValueSequence(const std::vector<ValuePtr> &value_vec) {
+bool IsAllFuncInValueSequence(const std::vector<ValuePtr> &value_vec) {
   for (auto &elem : value_vec) {
     if (elem->isa<ValueTuple>() || elem->isa<ValueList>()) {
       const auto &vec = GetValue<std::vector<ValuePtr>>(elem);
-      auto is_graph = IsAllGraphInValueSequence(vec);
+      auto is_graph = IsAllFuncInValueSequence(vec);
       if (!is_graph) {
         return false;
       }
-    } else if (!elem->isa<FuncGraph>()) {
+    } else if (!elem->isa<FuncGraph>() && !elem->isa<Primitive>()) {
       return false;
     }
   }
@@ -200,6 +196,8 @@ AnfNodePtr TransformToMakeTupleNodes(const FuncGraphManagerPtr &manager, const F
       FuncGraphPtr new_fg = elem->cast<FuncGraphPtr>();
       manager->AddFuncGraph(new_fg);
       node = NewValueNode(new_fg);
+    } else if (elem->isa<Primitive>()) {
+      node = NewValueNode(elem);
     } else {
       MS_LOG(EXCEPTION) << "TransformToMakeTupleNodes error, expect funcgraph, got " << elem->ToString();
     }
@@ -209,19 +207,21 @@ AnfNodePtr TransformToMakeTupleNodes(const FuncGraphManagerPtr &manager, const F
   return cnode;
 }
 
-// transform the ValueTuple or ValueList of graph node to make tuple of const graph node
-bool TransformVectorGraphValueNode(const FuncGraphManagerPtr &manager, const FuncGraphPtr &func_graph,
-                                   const ValueNodePtr &value_node, AnfNodePtr *const transformed) {
+// transform the ValueTuple or ValueList of graph/primitve node to make tuple of const graph/primitve node
+bool TransformVectorFuncValueNode(const FuncGraphManagerPtr &manager, const FuncGraphPtr &func_graph,
+                                  const ValueNodePtr &value_node, AnfNodePtr *const transformed) {
   MS_EXCEPTION_IF_NULL(value_node);
   const auto &value_vec = GetValue<std::vector<ValuePtr>>(value_node->value());
-  if (!IsAllGraphInValueSequence(value_vec)) {
+  if (!IsAllFuncInValueSequence(value_vec)) {
     return false;
   }
 
-  // The celllist or ordered_cell will be parsed as valuetuple of const graph in it,
+  // (1) The celllist or ordered_cell will be parsed as valuetuple of const graph in it,
   // So if has graph in list, try to replace the node with make tuple of graph value node.
   // we do this because the graphmanger won't investigate the graph inside valuetuple,
-  // change the vector of graph to be make_tuple of graph value node
+  // change the vector of graph to be make_tuple of graph value node.
+  // (2) the primitve valuetuple or valuelist may encounter to abstract error, make it all
+  // independent nodes.
   auto node_tuple_graphs = TransformToMakeTupleNodes(manager, func_graph, value_vec);
   // replace the ret ptr to be make tuple of graph value node
   *transformed = node_tuple_graphs;
@@ -255,8 +255,8 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 
   // if the constant node is constant of vector of graph ,add graph to manager
   if (IsValueNode<ValueTuple>(resolved_node) || IsValueNode<ValueList>(resolved_node)) {
-    (void)TransformVectorGraphValueNode(manager, node->func_graph(), resolved_node->cast<ValueNodePtr>(),
-                                        &resolved_node);
+    (void)TransformVectorFuncValueNode(manager, node->func_graph(), resolved_node->cast<ValueNodePtr>(),
+                                       &resolved_node);
   }
 
   TraceManager::EndTrace();
diff --git a/mindspore/ccsrc/pipeline/parse/resolve.h b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/parse/resolve.h
rename to mindspore/ccsrc/pipeline/jit/parse/resolve.h
index df5c54855f..d924f1ef44 100644
--- a/mindspore/ccsrc/pipeline/parse/resolve.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
@@ -21,9 +21,9 @@
 #include <string>
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "abstract/abstract_value.h"
 #include "utils/log_adapter.h"
 
 // forward declaration of ResourceBase
diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/jit/pass.cc
similarity index 91%
rename from mindspore/ccsrc/pipeline/pass.cc
rename to mindspore/ccsrc/pipeline/jit/pass.cc
index f6cfd6362c..bb9a517556 100644
--- a/mindspore/ccsrc/pipeline/pass.cc
+++ b/mindspore/ccsrc/pipeline/jit/pass.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pass.h"
+#include "pipeline/jit/pass.h"
 
 #include <memory>
 #include <utility>
@@ -26,19 +26,19 @@
 
 #include "ir/func_graph_cloner.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/resource.h"
-#include "pipeline/validator.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/cse.h"
-#include "optimizer/graph_kernel_reuse.h"
-#include "optimizer/clean.h"
-#include "optimizer/irpass.h"
-#include "optimizer/control_depend.h"
-#include "parallel/step_parallel.h"
-#include "parallel/step_auto_parallel.h"
-#include "parallel/allreduce_fusion/step_allreduce_fusion.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/validator.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/cse.h"
+#include "frontend/optimizer/graph_kernel_reuse.h"
+#include "frontend/optimizer/clean.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/control_depend.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
+#include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h"
 #include "utils/any.h"
 #include "utils/log_adapter.h"
 
@@ -321,21 +321,19 @@ bool InferenceOptPreparePass(const ResourcePtr &res) {
   return true;
 }
 
-std::vector<PassItem> kVmPasses = {{"simplify_data_structures", SimplifyDataStructuresPass},
-                                   {"opt_a", OptPassAGroup},
+std::vector<PassItem> kVmPasses = {{"opt_a", OptPassAGroup},
+                                   {"simplify_data_structures", SimplifyDataStructuresPass},
                                    {"opt_b", OptPassBGroup},
                                    {"cconv", CconvPass},
                                    {"opt_graph_kernel_a", OptPassGraphKernelGroupA},
                                    {"opt_graph_kernel_b", OptPassGraphKernelGroupB},
                                    {"add_control_depend", AddControlDependPass}};
 
-std::vector<PassItem> kGePasses = {{"simplify_data_structures", SimplifyDataStructuresPass},
-                                   {"opt_a", OptPassAGroup},
-                                   {"opt_b", OptPassBGroup},
-                                   {"add_control_depend", AddControlDependPass},
-                                   {"opt_control", ControlGroup},
-                                   {"opt_prepare", PrepareGroup},
-                                   {"cconv", CconvPass}};
+std::vector<PassItem> kGePasses = {
+  {"opt_a", OptPassAGroup},      {"simplify_data_structures", SimplifyDataStructuresPass},
+  {"opt_b", OptPassBGroup},      {"add_control_depend", AddControlDependPass},
+  {"opt_control", ControlGroup}, {"opt_prepare", PrepareGroup},
+  {"cconv", CconvPass}};
 
 std::vector<PassItem> kPynativePasses = {{"opt_a", OptPassAGroup}, {"opt_b", OptPassBGroup}, {"cconv", CconvPass}};
 }  // namespace pipeline
diff --git a/mindspore/ccsrc/pipeline/pass.h b/mindspore/ccsrc/pipeline/jit/pass.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/pass.h
rename to mindspore/ccsrc/pipeline/jit/pass.h
index 9064df52ee..0233b6cf26 100644
--- a/mindspore/ccsrc/pipeline/pass.h
+++ b/mindspore/ccsrc/pipeline/jit/pass.h
@@ -21,7 +21,7 @@
 #include <functional>
 #include <utility>
 #include <string>
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
similarity index 93%
rename from mindspore/ccsrc/pipeline/pipeline.cc
rename to mindspore/ccsrc/pipeline/jit/pipeline.cc
index 6abe198f5a..05699793ff 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pipeline.h"
+#include "pipeline/jit/pipeline.h"
 
 #include <sstream>
 #include <map>
@@ -24,27 +24,27 @@
 #include <cstdlib>
 #include <algorithm>
 
-#include "ir/param_value_py.h"
-#include "pipeline/pass.h"
-#include "pipeline/parse/data_converter.h"
-#include "optimizer/ad/dfunctor.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/pass.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "utils/config_manager.h"
 #include "utils/convert_utils.h"
 #include "utils/utils.h"
 #include "vm/segment_runner.h"
-#include "parallel/context.h"
-#include "parallel/graph_util/get_parallel_info.h"
-#include "device/kernel_runtime_manager.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/graph_util/get_parallel_info.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "debug/trace.h"
-#include "pynative/pynative_execute.h"
-#include "optimizer/py_pass_manager.h"
+#include "pipeline/pynative/pynative_execute.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 #if (ENABLE_GE || ENABLE_D)
-#include "pipeline/pipeline_ge.h"
-#include "transform/convert.h"
-#include "transform/df_graph_manager.h"
+#include "pipeline/jit/pipeline_ge.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #endif
 
 namespace mindspore {
@@ -289,7 +289,8 @@ std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchI
   MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase_s << ")!";
   std::map<std::string, std::pair<PrimitivePyPtr, std::string>> fake_quant_table;
   auto filter = [](AnfNodePtr node) {
-    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul));
+    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
+             IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
   };
   std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
   auto is_quant_cnode = [](AnfNodePtr node) {
@@ -327,6 +328,9 @@ std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchI
       x = cnode->input(1);
       count += 1;
     }
+    if (x->isa<Parameter>()) {
+      fake_quant_table[weight_name] = std::make_pair(nullptr, "input");
+    }
     // get the fakequant parameter minq's name
     if (!is_quant_cnode(x)) {
       continue;
@@ -374,34 +378,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
   MS_LOG(INFO) << "End save compiled func graph!";
 }
 
-void ExecutorPy::SaveCompiledGraphToPb(const std::string &phase_s) {
-#ifdef ENABLE_DUMP_IR
-  // save the graph to file in protobuf format
-  FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph();
-  MS_EXCEPTION_IF_NULL(func_graph);
-  if (phase_s.empty()) {
-    MS_LOG(ERROR) << "`phase` is empty '" << phase_s << "'!";
-    return;
-  }
-  std::string name_prefix = phase_s.substr(0, phase_s.find("."));
-  std::string pb_filename = std::string("ms_output_") + name_prefix + ".pb";
-  std::string filename = GetFilePathName(pb_filename);
-
-  MS_LOG(INFO) << "Begin saving graph to file <<'" << filename << "' in protobuf formart.";
-  ChangeFileMode(filename, S_IRWXU);
-  std::ofstream ofs(filename);
-  if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
-    return;
-  }
-  ofs << GetFuncGraphProtoString(func_graph);
-  ofs.close();
-  // set file mode to read only by user
-  ChangeFileMode(filename, S_IRUSR);
-  MS_LOG(INFO) << "End saving graph to file in protobuf format";
-#endif
-}
-
 bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
   std::string phase_prefix = GetPhasePrefix(phase_s);
 
@@ -476,8 +452,6 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   info_[phase_s] = executor_info;
   pip->Run();
 
-  // save compile graph to file in protobuf format
-  SaveCompiledGraphToPb(phase_s);
   // save the run graph func to MsPipeLine
   SaveCompiledGraph(phase_s);
 
@@ -650,11 +624,8 @@ void Pipeline::Run() {
           draw::Draw(base_name + ".dot", graph);
           // generate IR file in human readable format
           DumpIR(base_name + ".ir", graph);
-
           // generate IR file in a heavily commented format, which can also be reloaded
-          if (action.first != "parse") {
-            ExportIR(base_name + ".dat", std::to_string(i), graph);
-          }
+          ExportIR(base_name + ".dat", std::to_string(i), graph);
         }
 #ifdef MS_DEBUG
         // Dump graph cnode list
@@ -727,10 +698,7 @@ void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef
       if (!param_ptr->has_default()) {
         MS_LOG(EXCEPTION) << "Parameter[" << i << "] has no default param";
       }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-      py::object obj = param_value->value();
-      py::object p_value = py::cast<py::object>(parse::python_adapter::GetPyObjAttr(obj, "default_input"));
-      (*arg_list).push_back(p_value);
+      arg_list->push_back(param_ptr->default_param()->value());
     }
   }
 }
diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/pipeline.h
rename to mindspore/ccsrc/pipeline/jit/pipeline.h
index 3f1274c417..705853d086 100644
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -29,10 +29,10 @@
 #include "debug/draw.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
-#include "pipeline/action.h"
+#include "pipeline/jit/action.h"
 #include "vm/segment_runner.h"
 #include "vm/transform.h"
-#include "pipeline/base.h"
+#include "pipeline/jit/base.h"
 
 namespace mindspore {
 extern const char kMsConvert[];
@@ -72,7 +72,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
   ~ExecutorPy();
 
   void SaveCompiledGraph(const std::string &phase_s);
-  void SaveCompiledGraphToPb(const std::string &phase_s);
   bool CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
   bool Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
 
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/pipeline_ge.cc
rename to mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
index 8ec1602315..e08af4f2dc 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pipeline_ge.h"
+#include "pipeline/jit/pipeline_ge.h"
 
 #include <sstream>
 #include <map>
@@ -24,12 +24,12 @@
 
 #include "debug/anf_ir_dump.h"
 #include "ir/tensor.h"
-#include "transform/convert.h"
-#include "transform/df_graph_manager.h"
-#include "transform/graph_builder.h"
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/graph_builder.h"
+#include "transform/graph_ir/graph_runner.h"
 #include "debug/draw.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.h b/mindspore/ccsrc/pipeline/jit/pipeline_ge.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/pipeline_ge.h
rename to mindspore/ccsrc/pipeline/jit/pipeline_ge.h
index f3a363dbe8..f834125231 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline_ge.h
@@ -26,8 +26,8 @@
 #include <mutex>
 
 #include "pybind11/pybind11.h"
-#include "pipeline/base.h"
-#include "operator/ops.h"
+#include "pipeline/jit/base.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/remove_value_node_dup.cc b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/remove_value_node_dup.cc
rename to mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
index 47881e4b91..e9467e4aeb 100644
--- a/mindspore/ccsrc/pipeline/remove_value_node_dup.cc
+++ b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/remove_value_node_dup.h"
+#include "pipeline/jit/remove_value_node_dup.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
-#include "optimizer/cse.h"
+#include "frontend/optimizer/cse.h"
 #include "utils/log_adapter.h"
 #include "utils/hashing.h"
 
diff --git a/mindspore/ccsrc/pipeline/remove_value_node_dup.h b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/remove_value_node_dup.h
rename to mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
index 8f670c7dcf..b36544bdba 100644
--- a/mindspore/ccsrc/pipeline/remove_value_node_dup.h
+++ b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
@@ -19,7 +19,7 @@
 
 #include <unordered_map>
 #include <vector>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/resource.cc b/mindspore/ccsrc/pipeline/jit/resource.cc
similarity index 82%
rename from mindspore/ccsrc/pipeline/resource.cc
rename to mindspore/ccsrc/pipeline/jit/resource.cc
index faf1f2015d..ece128b77b 100644
--- a/mindspore/ccsrc/pipeline/resource.cc
+++ b/mindspore/ccsrc/pipeline/jit/resource.cc
@@ -16,16 +16,16 @@
  * limitations under the License.
  */
 
-#include "pipeline/resource.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "debug/draw.h"
 #include "debug/trace.h"
 #include "ir/dtype.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "vm/segment_runner.h"
 
 namespace mindspore {
@@ -146,37 +146,35 @@ MethodMap &GetMethodMap() {
      }},
     {kObjectTypeTensorType,
      {
-       {"__add__", std::string("add")},                    // C.add
-       {"__sub__", std::string("sub")},                    // C.sub
-       {"__mul__", std::string("mul")},                    // C.mul
-       {"__truediv__", std::string("truediv")},            // C.truediv
-       {"__floordiv__", std::string("floordiv")},          // C.floordiv
-       {"__mod__", std::string("mod")},                    // C.mod
-       {"__pow__", std::string("pow_")},                   // C.pow
-       {"__floor__", std::string("array_floor")},          // C.array_floor
-       {"__trunc__", std::string("array_trunc")},          // C.array_trunc
-       {"__pos__", std::string("array_uadd")},             // C.array_uadd
-       {"__neg__", std::string("array_usub")},             // C.array_usub
-       {"__eq__", std::string("eq")},                      // C.eq
-       {"__ne__", std::string("ne")},                      // C.ne
-       {"__lt__", std::string("lt")},                      // C.lt
-       {"__gt__", std::string("gt")},                      // C.gt
-       {"__le__", std::string("le")},                      // C.le
-       {"__ge__", std::string("ge")},                      // C.ge
-       {"__matmul__", prim::kPrimDot},                     // P.dot,
-       {"__len__", prim::kPrimArrayLen},                   // P.array_len,
-       {"__getitem__", prim::kPrimArrayGetItem},           // P.array_getitem,
-       {"__setitem__", prim::kPrimArraySetItem},           // P.array_setitem,
-       {"__ms_iter__", std::string("array_iter")},         // C.array_iter
-       {"__ms_to_array__", prim::kPrimIdentity},           // P.identity,
-       {"item", prim::kPrimArrayToScalar},                 // P.array_to_scalar,
-       {"transpose", std::string("transpose")},            // P.transpose
-       {"__bool__", std::string("tensor_bool")},           // C.tensor_bool
-       {"is_indexed_slices", prim::kPrimIsIndexedSlices},  // F.is_indexed_slices
+       {"__add__", std::string("add")},             // C.add
+       {"__sub__", std::string("sub")},             // C.sub
+       {"__mul__", std::string("mul")},             // C.mul
+       {"__truediv__", std::string("truediv")},     // C.truediv
+       {"__floordiv__", std::string("floordiv")},   // C.floordiv
+       {"__mod__", std::string("mod")},             // C.mod
+       {"__pow__", std::string("pow_")},            // C.pow
+       {"__floor__", std::string("array_floor")},   // C.array_floor
+       {"__trunc__", std::string("array_trunc")},   // C.array_trunc
+       {"__pos__", std::string("array_uadd")},      // C.array_uadd
+       {"__neg__", std::string("array_usub")},      // C.array_usub
+       {"__eq__", std::string("eq")},               // C.eq
+       {"__ne__", std::string("ne")},               // C.ne
+       {"__lt__", std::string("lt")},               // C.lt
+       {"__gt__", std::string("gt")},               // C.gt
+       {"__le__", std::string("le")},               // C.le
+       {"__ge__", std::string("ge")},               // C.ge
+       {"__matmul__", prim::kPrimDot},              // P.dot,
+       {"__len__", prim::kPrimArrayLen},            // P.array_len,
+       {"__getitem__", prim::kPrimArrayGetItem},    // P.array_getitem,
+       {"__setitem__", prim::kPrimArraySetItem},    // P.array_setitem,
+       {"__ms_iter__", std::string("array_iter")},  // C.array_iter
+       {"__ms_to_array__", prim::kPrimIdentity},    // P.identity,
+       {"item", prim::kPrimArrayToScalar},          // P.array_to_scalar,
+       {"transpose", std::string("transpose")},     // P.transpose
+       {"__bool__", std::string("tensor_bool")},    // C.tensor_bool
      }},
     {kObjectTypeIndexedSlicesType,
      {
-       {"is_indexed_slices", prim::kPrimIsIndexedSlices},       // F.is_indexed_slices
        {"values", prim::kPrimIndexedSlicesGetValues},           // F.indexed_slices_get_values
        {"indices", prim::kPrimIndexedSlicesGetIndices},         // F.indexed_slices_get_indices
        {"dense_shape", prim::kPrimIndexedSlicesGetDenseShape},  // F.indexed_slices_get_dense_shape
diff --git a/mindspore/ccsrc/pipeline/resource.h b/mindspore/ccsrc/pipeline/jit/resource.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/resource.h
rename to mindspore/ccsrc/pipeline/jit/resource.h
index 0c1348fd94..819fdd3d20 100644
--- a/mindspore/ccsrc/pipeline/resource.h
+++ b/mindspore/ccsrc/pipeline/jit/resource.h
@@ -29,8 +29,8 @@
 #include "utils/any.h"
 #include "utils/profile.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
index ced4a518cb..8bdb2a0c6c 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/abstract_function.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 
 #include <vector>
 
-#include "pipeline/static_analysis/analysis_context.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
index 9e1cf9ba83..0823b21cd7 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <string>
 
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/analysis_context.h"
+#include "abstract/abstract_value.h"
+#include "abstract/analysis_context.h"
 #include "ir/meta_func_graph.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
index 34ecfc8980..3e820eed3a 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 #include <algorithm>
 #include <unordered_set>
 
 #include "ir/func_graph_cloner.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -126,7 +126,11 @@ EvalResultPtr BaseFuncGraphEvaluator::Eval(AnalysisEnginePtr engine, const Abstr
   }
 
   MS_EXCEPTION_IF_NULL(ret_base);
-  MS_LOG(DEBUG) << "BaseFuncGraph " << fg->ToString() << " eval end, evaluated abstract: " << ret_base->ToString();
+  MS_LOG(DEBUG) << "BaseFuncGraph " << fg->ToString() << " eval end, evaluated abstract: " << ret_base->ToString()
+                << ", is stub: " << fg->stub();
+  if (fg->stub()) {
+    return std::make_shared<EvalResult>(std::make_shared<AbstractUndetermined>(), nullptr);
+  }
   return std::make_shared<EvalResult>(ret_base, nullptr);
 }
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.h b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/static_analysis/evaluator.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
index f6430eda84..461574257d 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
@@ -24,7 +24,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace abstract {
@@ -59,6 +60,13 @@ class Evaluator : public Base {
   }
 
   virtual EvalResultPtr AbstractEval(const AbstractBasePtrList &args_spec_list) {
+    auto context = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(context);
+    bool enable_sparse = context->enable_sparse();
+    if (!enable_sparse) {
+      return nullptr;
+    }
+
     auto is_abstract = std::any_of(args_spec_list.begin(), args_spec_list.end(), [](auto &arg) {
       if (arg->BuildType()->type_id() == kObjectTypeUndeterminedType) {
         return true;
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/prim.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
index 99dc085989..99e613395c 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 #include <algorithm>
 #include <limits>
@@ -25,20 +25,20 @@
 #include <string>
 #include <utility>
 
-#include "operator/cc_implementations.h"
-#include "operator/ops.h"
-#include "operator/composite/do_signature.h"
-#include "operator/prim_to_function.h"
-#include "pipeline/static_analysis/utils.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "frontend/operator/prim_to_function.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 #include "./common.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/resolve.h"
 #include "ir/tensor.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "abstract/param_validator.h"
 #include "common/utils.h"
 
 namespace mindspore {
@@ -146,10 +146,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
 using mindspore::parse::PyObjectWrapper;
 
 EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag && prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch) {
+  if (prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch) {
     auto ret_abstract = AbstractEval(args);
     if (ret_abstract != nullptr) {
       MS_LOG(DEBUG) << "StandardPrimEvaluator eval Undetermined";
@@ -167,6 +164,14 @@ EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, c
 EvalResultPtr DoSignatureEvaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &args_conf_list,
                                         AnfNodeConfigPtr out_conf) {
   AbstractBasePtrList args_spec_list;
+  (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list),
+                       [](const ConfigPtr &ref) -> AbstractBasePtr { return ref->GetEvaluatedValue()->abstract(); });
+  auto ret_abstract = AbstractEval(args_spec_list);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "StandardPrimEvaluator eval Undetermined";
+    return ret_abstract;
+  }
+
   if (out_conf->node() == nullptr || !out_conf->node()->isa<CNode>()) {
     MS_LOG(EXCEPTION) << "Node of out_conf should be CNode";
   }
@@ -181,9 +186,6 @@ EvalResultPtr DoSignatureEvaluator::Run(AnalysisEnginePtr engine, const ConfigPt
   }
   AnfNodePtrList args_inputs{out_node_inputs.begin() + 1, out_node_inputs.end()};
 
-  (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list),
-                       [](const ConfigPtr &ref) -> AbstractBasePtr { return ref->GetEvaluatedValue()->abstract(); });
-
   ScopePtr scope = kDefaultScope;
   if (out_conf != nullptr) {
     scope = out_conf->node()->scope();
@@ -321,6 +323,13 @@ AnfNodePtr MixedPrecisionCastHelper(AnfNodePtr source_node, AbstractBasePtr node
     }
     target_node = func_graph->NewCNode({NewValueNode(prim::kPrimMakeDict), func_graph->NewCNode(dict_key_nodes),
                                         func_graph->NewCNode(dict_value_nodes)});
+  } else if (node_type->isa<AbstractKeywordArg>()) {
+    auto x = node_type->cast<AbstractKeywordArgPtr>();
+    std::string kwarg_key = x->get_key();
+    AnfNodePtr kwarg_value_node =
+      func_graph->NewCNode({NewValueNode(prim::kPrimExtractKeywordArg), NewValueNode(kwarg_key), source_node});
+    AnfNodePtr node = MixedPrecisionCastHelper(kwarg_value_node, x->get_arg(), target_type, func_graph);
+    target_node = func_graph->NewCNode({NewValueNode(prim::kPrimMakeKeywordArg), NewValueNode(kwarg_key), node});
   }
   return target_node;
 }
@@ -502,15 +511,10 @@ AbstractBasePtr PyInferRes2Abstract(const PrimitivePyPtr &prim_py, const py::dic
 }  // end anonymous namespace
 
 EvalResultPtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag) {
-    auto ret_abstract = AbstractEval(args);
-    if (ret_abstract != nullptr) {
-      MS_LOG(DEBUG) << "PythonPrimEvaluator eval Undetermined";
-      return ret_abstract;
-    }
+  auto ret_abstract = AbstractEval(args);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "PythonPrimEvaluator eval Undetermined";
+    return ret_abstract;
   }
   MS_LOG(DEBUG) << "Eval for:" << prim_py_->ToString();
 
@@ -539,15 +543,10 @@ EvalResultPtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const Abs
 }
 
 EvalResultPtr UniformPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag) {
-    auto ret_abstract = AbstractEval(args);
-    if (ret_abstract != nullptr) {
-      MS_LOG(DEBUG) << "UniformPrimEvaluator eval Undetermined";
-      return ret_abstract;
-    }
+  auto ret_abstract = AbstractEval(args);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "UniformPrimEvaluator eval Undetermined";
+    return ret_abstract;
   }
   // if func_desc_.retval type is super class of parameter type, then make the retval type as parameter type.
   if (nargs_ != args.size()) {
@@ -907,8 +906,6 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator {
       auto ret = std::make_shared<AbstractScalar>(type);
       auto ref_value = ref_abs->ref();
       MS_EXCEPTION_IF_NULL(ref_value);
-      ret->set_sparse_grad(ref_value->sparse_grad());
-      ret->set_has_indexed_slices_grad(ref_value->has_indexed_slices_grad());
       return std::make_shared<EvalResult>(ret, std::make_shared<AttrValueMap>());
     }
 
@@ -923,8 +920,6 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator {
     x = SensitivityTransform(x);
     std::shared_ptr<SymbolicKeyInstance> key = std::make_shared<SymbolicKeyInstance>(node, x);
     std::shared_ptr<AbstractScalar> abs_scalar = std::make_shared<AbstractScalar>(key, type);
-    abs_scalar->set_sparse_grad(x->sparse_grad());
-    abs_scalar->set_has_indexed_slices_grad(x->has_indexed_slices_grad());
     return std::make_shared<EvalResult>(abs_scalar, std::make_shared<AttrValueMap>());
   }
 };
@@ -936,15 +931,10 @@ class GetAttrEvaluator : public TransitionPrimEvaluator {
   MS_DECLARE_PARENT(GetAttrEvaluator, TransitionPrimEvaluator);
   EvalResultPtr EvalPrim(const AnalysisEnginePtr &engine, const AbstractBasePtrList &args_spec_list,
                          const ConfigPtr &in_conf0, const AnfNodeConfigPtr &out_conf) override {
-    auto context = MsContext::GetInstance();
-    MS_EXCEPTION_IF_NULL(context);
-    bool enable_sparse_flag = context->enable_sparse_flag();
-    if (enable_sparse_flag) {
-      auto ret_abstract = AbstractEval(args_spec_list);
-      if (ret_abstract != nullptr) {
-        MS_LOG(DEBUG) << "GetAttrEvaluator eval Undetermined";
-        return ret_abstract;
-      }
+    auto ret_abstract = AbstractEval(args_spec_list);
+    if (ret_abstract != nullptr) {
+      MS_LOG(DEBUG) << "GetAttrEvaluator eval Undetermined";
+      return ret_abstract;
     }
     // Inputs: data, item
     if (args_spec_list.size() != 2) {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.h b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/prim.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
index 1346dba2a2..692fbe66e8 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
@@ -25,7 +25,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
@@ -349,7 +349,6 @@ AbstractBasePtr InferImplControlDepend(const AnalysisEnginePtr &, const Primitiv
 
 AbstractBasePtr InferImplDebug(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                const AbstractBasePtrList &args_spec_list);
-void InitUndeterminedFromEnv(const std::string &sparse_shape_types);
 
 AbstractBasePtr InferImplMakeIndexedSlices(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                            const AbstractBasePtrList &args_spec_list);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
index e01b98841b..ad39190dc3 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/program_specialize.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
 
 #include <algorithm>
 #include <exception>
 #include "./common.h"
-#include "operator/ops.h"
-#include "operator/composite/do_signature.h"
-#include "pipeline/static_analysis/abstract_function.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 #include "utils/graph_utils.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
@@ -321,7 +321,7 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNode(const AnfNodePtr &node, co
   AbstractFunctionPtr func = real_a->GetUnique();
   SpecializeStatusCode errcode;
   ScopeGuard scope_guard(node->scope());
-  AnfNodePtr repl = BuildSpecializedNodeInner(abs, func, argvals, &errcode);
+  AnfNodePtr repl = BuildSpecializedNodeInner(node, abs, func, argvals, &errcode);
   if (repl == nullptr) {
     if (errcode == kSpecializeFindUniqueArgvalDead) {
       const auto error_dead_node = std::make_shared<AbstractError>(kDeadNode, node);
@@ -340,7 +340,8 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNode(const AnfNodePtr &node, co
   return repl;
 }
 
-AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AbstractBasePtr &abs, const AbstractFunctionPtr &func,
+AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AnfNodePtr &node, const AbstractBasePtr &abs,
+                                                           const AbstractFunctionPtr &func,
                                                            const AbstractBasePtrList &args,
                                                            SpecializeStatusCode *errcode) {
   MS_EXCEPTION_IF_NULL(abs);
@@ -384,7 +385,14 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AbstractBasePtr
   AnalysisContextPtr context = real_eval->MakeContext(engine_, argvals);
   MS_LOG(DEBUG) << "Specialize function graph: " << context->func_graph()->ToString() << ", args: " << argvals.size()
                 << ", graph: " << context->func_graph()->get_return()->DebugString();
+  if (context->func_graph()->stub()) {
+    MS_LOG(DEBUG) << "Specialize stub function graph, return the original node: " << context->func_graph()->ToString()
+                  << ", args: " << argvals.size() << ", graph: " << context->func_graph()->get_return()->DebugString()
+                  << ", " << node->ToString();
+    return node;
+  }
   FuncGraphPtr v = specializer_->SpecializeFuncGraph(context->func_graph(), context);
+  v->set_flag(kFuncGraphFlagUndetermined, false);
   return BuildValueNode(v, abs);
 }
 
@@ -613,7 +621,8 @@ SpecializeStatusCode FuncGraphSpecializer::FindUniqueArgvals(const AbstractFunct
     *result = std::make_pair(choices->begin()->first, choices->begin()->second->abstract());
     return kSpecializeSuccess;
   } else if (choices->empty()) {
-    MS_LOG(DEBUG) << "Find DEAD code, it may be optimized in later phase.";
+    MS_LOG(DEBUG) << "Find DEAD code, it may be optimized in later phase " << func->ToString() << " | "
+                  << func->type_name();
     return kSpecializeFindUniqueArgvalDead;
   } else {
     if (IsPolyFunc(func, argvals)) {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.h b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
similarity index 94%
rename from mindspore/ccsrc/pipeline/static_analysis/program_specialize.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
index b04978586d..d7f95be4ca 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
@@ -29,7 +29,7 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
@@ -118,8 +118,9 @@ class FuncGraphSpecializer : public std::enable_shared_from_this<FuncGraphSpecia
   // Build a specialized node from given argvals;
   AnfNodePtr BuildSpecializedNode(const AnfNodePtr &node, const AbstractBasePtr &abs,
                                   const AbstractBasePtrList &argvals);
-  AnfNodePtr BuildSpecializedNodeInner(const AbstractBasePtr &abs, const AbstractFunctionPtr &func,
-                                       const AbstractBasePtrList &args, SpecializeStatusCode *errcode);
+  AnfNodePtr BuildSpecializedNodeInner(const AnfNodePtr &node, const AbstractBasePtr &abs,
+                                       const AbstractFunctionPtr &func, const AbstractBasePtrList &args,
+                                       SpecializeStatusCode *errcode);
 
   // Find the unique argument values which can be used to specialize a primitive or graph function.
   SpecializeStatusCode FindUniqueArgvals(const AbstractFunctionPtr &fn, const EvaluatorPtr &eval,
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
similarity index 94%
rename from mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
index 5416576680..b9e747a70b 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
@@ -16,21 +16,21 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 #include <algorithm>
 #include <set>
 
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "abstract/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 #include "utils/symbolic.h"
 #include "ir/tensor.h"
 #include "ir/func_graph_cloner.h"
 #include "./common.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "debug/draw.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -612,10 +612,34 @@ EvalResultPtr AnfNodeConfig::GetEvaluatedValue() {
   return engine_.lock()->GetEvaluatedValue(self);
 }
 
+abstract::AbstractBasePtr MakeAbstractClosure(const FuncGraphPtr &func_graph,
+                                              const abstract::AnalysisContextPtr &context) {
+  AnalysisContextPtr temp_context = context;
+  if (temp_context == nullptr) {
+    temp_context = abstract::AnalysisContext::DummyContext();
+  }
+  return std::make_shared<abstract::FuncGraphAbstractClosure>(func_graph, temp_context);
+}
+
+abstract::AbstractBasePtr MakeAbstractClosure(const MetaFuncGraphPtr &meta_func_graph, const AnfNodePtr &anf_node) {
+  abstract::MetaFuncGraphAbstractClosurePtr meta_func_graph_fn;
+  if (anf_node == nullptr) {
+    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(meta_func_graph);
+  } else {
+    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(meta_func_graph, anf_node->scope());
+  }
+  return meta_func_graph_fn;
+}
+
+abstract::AbstractBasePtr MakeAbstractClosure(const PrimitivePtr &primitive, const AnfNodePtr &anf_node) {
+  auto prim_func = std::make_shared<abstract::PrimitiveAbstractClosure>(primitive, anf_node);
+  return prim_func;
+}
+
 AbstractBasePtr ToAbstract(const ValuePtr &value, const AnalysisContextPtr &context, const AnfNodeConfigPtr &conf) {
   if (value->isa<FuncGraph>()) {
     auto func_graph = value->cast<FuncGraphPtr>();
-    return func_graph->MakeAbstractClosure(context);
+    return MakeAbstractClosure(func_graph, context);
   }
   AnfNodePtr anf_node = nullptr;
   if (conf != nullptr) {
@@ -623,11 +647,11 @@ AbstractBasePtr ToAbstract(const ValuePtr &value, const AnalysisContextPtr &cont
   }
   if (value->isa<MetaFuncGraph>()) {
     auto meta_func_graph = value->cast<MetaFuncGraphPtr>();
-    return meta_func_graph->MakeAbstractClosure(anf_node);
+    return MakeAbstractClosure(meta_func_graph, anf_node);
   }
   if (value->isa<Primitive>()) {
     auto prim = value->cast<PrimitivePtr>();
-    return prim->ToPrimAbstract(anf_node);
+    return MakeAbstractClosure(prim, anf_node);
   }
   return value->ToAbstract();
 }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
index a0b7ee5478..181696f756 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
@@ -33,10 +33,10 @@
 
 #include "utils/log_adapter.h"
 #include "ir/anf.h"
-#include "ir/primitive.h"
-#include "pipeline/static_analysis/analysis_context.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/parse/parse.h"
+#include "ir/primitive_py.h"
+#include "abstract/analysis_context.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/validator.cc b/mindspore/ccsrc/pipeline/jit/validator.cc
similarity index 97%
rename from mindspore/ccsrc/pipeline/validator.cc
rename to mindspore/ccsrc/pipeline/jit/validator.cc
index bbca3c8721..04aa6efd05 100644
--- a/mindspore/ccsrc/pipeline/validator.cc
+++ b/mindspore/ccsrc/pipeline/jit/validator.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/validator.h"
+#include "pipeline/jit/validator.h"
 
 #include <memory>
 #include <mutex>
@@ -24,7 +24,7 @@
 #include "ir/manager.h"
 #include "ir/dtype.h"
 #include "./common.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 namespace mindspore {
 namespace validator {
diff --git a/mindspore/ccsrc/pipeline/validator.h b/mindspore/ccsrc/pipeline/jit/validator.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/validator.h
rename to mindspore/ccsrc/pipeline/jit/validator.h
index 61f7470349..041448aed9 100644
--- a/mindspore/ccsrc/pipeline/validator.h
+++ b/mindspore/ccsrc/pipeline/jit/validator.h
@@ -23,7 +23,7 @@
 #include <iostream>
 #include <memory>
 #include <unordered_set>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/anf.h"
 #include "utils/misc.h"
 
diff --git a/mindspore/ccsrc/pynative/CMakeLists.txt b/mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
similarity index 84%
rename from mindspore/ccsrc/pynative/CMakeLists.txt
rename to mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
index 5139160774..c15928ee76 100644
--- a/mindspore/ccsrc/pynative/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
@@ -6,4 +6,4 @@ if (ENABLE_GE)
 endif ()
 
 set_property(SOURCE ${_PYNATIVE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PYNATIVE)
-add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST})
+add_library(_mindspore_pipeline_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST})
diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pipeline/pynative/base.h
similarity index 95%
rename from mindspore/ccsrc/pynative/base.h
rename to mindspore/ccsrc/pipeline/pynative/base.h
index 60ae869227..afb6d0982b 100644
--- a/mindspore/ccsrc/pynative/base.h
+++ b/mindspore/ccsrc/pipeline/pynative/base.h
@@ -26,8 +26,8 @@
 #include <unordered_set>
 
 #include "pybind11/pybind11.h"
-#include "ir/primitive.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "ir/primitive_py.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace pynative {
diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
similarity index 89%
rename from mindspore/ccsrc/pynative/pynative_execute.cc
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
index f477bfbdcd..5e3add1b5f 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pynative/pynative_execute.h"
+#include "pipeline/pynative/pynative_execute.h"
 
 #include <typeinfo>
 #include <map>
@@ -24,33 +24,33 @@
 
 #include "debug/trace.h"
 #include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "utils/any.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
-#include "operator/composite/do_signature.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/resolve.h"
-#include "pipeline/static_analysis/prim.h"
-#include "session/session_factory.h"
-#include "pre_activate/pass/const_input_to_attr_registry.h"
-#include "pre_activate/common/helper.h"
-#include "pipeline/action.h"
-
-#include "pynative/base.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "backend/session/session_factory.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
+#include "pipeline/jit/action.h"
+
+#include "pipeline/pynative/base.h"
 #include "pybind_api/api_register.h"
 #include "vm/transform.h"
 
-#include "optimizer/ad/grad.h"
-#include "pipeline/resource.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/pass.h"
+#include "frontend/optimizer/ad/grad.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/pass.h"
 
 #ifdef ENABLE_GE
-#include "pynative/pynative_execute_ge.h"
+#include "pipeline/pynative/pynative_execute_ge.h"
 #endif
 
 using mindspore::tensor::TensorPy;
@@ -160,36 +160,83 @@ std::map<SignatureEnumDType, std::vector<size_t>> GetTypeIndex(const std::vector
   return type_indexes;
 }
 
-std::map<SignatureEnumDType, size_t> GetDstType(const py::tuple &py_args,
+std::map<SignatureEnumDType, TypeId> GetDstType(const py::tuple &py_args,
                                                 const std::map<SignatureEnumDType, std::vector<size_t>> &type_indexes) {
-  std::map<SignatureEnumDType, size_t> dst_type;
+  std::map<SignatureEnumDType, TypeId> dst_type;
   for (auto it = type_indexes.begin(); it != type_indexes.end(); (void)++it) {
     auto type = it->first;
     auto indexes = it->second;
-    if (indexes.size() < 2) {
+    if (type == SignatureEnumDType::kDTypeEmptyDefaultValue || indexes.size() < 2) {
       continue;
     }
-    size_t m_index = indexes[0];
-    for (size_t i = 1; i < indexes.size(); ++i) {
-      if (py::isinstance<tensor::Tensor>(py_args[indexes[i]])) {
-        m_index = indexes[i];
+    size_t priority = 0;
+    TypeId max_type = TypeId::kTypeUnknown;
+    bool has_float = false;
+    bool has_int = false;
+    for (size_t index : indexes) {
+      if (!has_float && py::isinstance<py::float_>(py_args[index])) {
+        has_float = true;
+      }
+      if (!has_int && !py::isinstance<py::bool_>(py_args[index]) && py::isinstance<py::int_>(py_args[index])) {
+        has_int = true;
+      }
+      if (py::isinstance<tensor::Tensor>(py_args[index])) {
+        auto arg = py::cast<tensor::TensorPtr>(py_args[index]);
+        TypeId arg_type_id = arg->data_type();
+        auto type_priority = prim::type_map.find(arg_type_id);
+        if (type_priority == prim::type_map.end()) {
+          continue;
+        }
+        if (type_priority->second > priority) {
+          max_type = type_priority->first;
+          priority = type_priority->second;
+        }
+      }
+    }
+    if (max_type == TypeId::kNumberTypeBool) {
+      if (has_int) {
+        max_type = TypeId::kNumberTypeInt32;
+      }
+      if (has_float) {
+        max_type = TypeId::kNumberTypeFloat32;
       }
     }
-    (void)dst_type.insert(std::make_pair(type, m_index));
+    (void)dst_type.insert(std::make_pair(type, max_type));
   }
   return dst_type;
 }
 
+std::string TypeIdToMsTypeStr(const TypeId &type_id) {
+  auto type_name = type_name_map.find(type_id);
+  if (type_name == type_name_map.end()) {
+    MS_LOG(EXCEPTION) << "For implicit type conversion, not support convert to the type: " << TypeIdToType(type_id);
+  }
+  return type_name->second;
+}
+
+py::object DoAutoCast(const py::object &arg, const TypeId &type_id) {
+  py::tuple args(3);
+  std::string module_name = "mindspore.ops.functional";
+  std::string op_name = "cast";
+  args[0] = parse::python_adapter::GetPyFn(module_name, op_name);
+  args[1] = "Cast";
+
+  std::string dst_type_str = TypeIdToMsTypeStr(type_id);
+  module_name = "mindspore.common.dtype";
+  py::object dst_type = parse::python_adapter::GetPyFn(module_name, dst_type_str);
+  py::tuple inputs(2);
+  inputs[0] = arg;
+  inputs[1] = dst_type;
+  args[2] = inputs;
+
+  return RunOp(args)[0];
+}
 py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tuple *const out_args,
                         py::list *const out_args_list) {
   auto &py_args = *out_args;
   py::tuple input_mask(args.size());
   for (size_t i = 0; i < args.size(); ++i) {
-    if (py::hasattr(args[i], "__parameter__")) {
-      input_mask[i] = true;
-    } else {
-      input_mask[i] = false;
-    }
+    input_mask[i] = py::hasattr(args[i], "__parameter__");
     py_args[i] = GetTupleObj(args[i]);
   }
   auto signature = prim->signatures();
@@ -197,26 +244,33 @@ py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tu
   (void)std::transform(signature.begin(), signature.end(), std::back_inserter(dtypes),
                        [](const Signature &sig) { return sig.dtype; });
   int empty_dtype_count = std::count(dtypes.begin(), dtypes.end(), SignatureEnumDType::kDTypeEmptyDefaultValue);
-  if (dtypes.size() == 0 || static_cast<int>(dtypes.size()) == empty_dtype_count) {
+  if (dtypes.empty() || static_cast<int>(dtypes.size()) == empty_dtype_count) {
     return input_mask;
   }
   auto type_indexes = GetTypeIndex(dtypes);
   auto dst_type = GetDstType(py_args, type_indexes);
-  for (size_t i = 0; i < py_args.size(); ++i) {
+
+  for (size_t i = 0; i < dtypes.size(); ++i) {
+    if (dtypes[i] == SignatureEnumDType::kDTypeEmptyDefaultValue) {
+      continue;
+    }
     auto it = dst_type.find(dtypes[i]);
-    if (it != dst_type.end() && it->second != i &&
-        (py::isinstance<py::int_>(py_args[i]) || py::isinstance<py::float_>(py_args[i]))) {
-      auto tensor_ptr = py::cast<tensor::TensorPtr>(py_args[it->second]);
-      if (py::isinstance<py::int_>(py_args[i])) {
-        py_args[i] = std::make_shared<tensor::Tensor>(py::cast<py::int_>(py_args[i]), tensor_ptr->Dtype());
-        (*out_args_list)[i] = py_args[i];
-      } else {
-        double arg_value = py::cast<py::float_>(py_args[i]);
-        py_args[i] = std::make_shared<tensor::Tensor>(arg_value, tensor_ptr->Dtype());
-        (*out_args_list)[i] = py_args[i];
-      }
+    if (it == dst_type.end() || it->second == kTypeUnknown) {
       continue;
     }
+    if (py::isinstance<tensor::Tensor>(py_args[i])) {
+      auto arg = py::cast<tensor::TensorPtr>(py_args[i]);
+      if (arg->data_type() == it->second) {
+        continue;
+      }
+      if (signature[i].rw == SignatureEnumRW::kRWWrite) {
+        prim::RaiseExceptionForConvertRefDtype(prim->name(), TypeIdToMsTypeStr(arg->data_type()),
+                                               TypeIdToMsTypeStr(it->second));
+      }
+    }
+    py::object cast_output = DoAutoCast(py_args[i], it->second);
+    (*out_args)[i] = cast_output;
+    (*out_args_list)[i] = cast_output;
   }
   return input_mask;
 }
@@ -297,14 +351,13 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
     for (size_t i = 0; i < op_inputs.size(); i++) {
       py::object input = op_inputs[i];
       if (py::hasattr(input, "__parameter__")) {
-        result[i] = py::getattr(input, "data");
-      } else {
-        auto tensor = py::cast<tensor::TensorPtr>(input);
-        auto new_tensor = std::make_shared<tensor::Tensor>(tensor->data_type(), tensor->shape(), tensor->data_ptr());
-        new_tensor->set_device_address(tensor->device_address());
-        new_tensor->set_dirty(tensor->is_dirty());
-        result[i] = new_tensor;
+        input = py::getattr(input, "data");
       }
+      auto tensor = py::cast<tensor::TensorPtr>(input);
+      auto new_tensor = std::make_shared<tensor::Tensor>(tensor->data_type(), tensor->shape(), tensor->data_ptr());
+      new_tensor->set_device_address(tensor->device_address());
+      new_tensor->set_dirty(tensor->is_dirty());
+      result[i] = new_tensor;
     }
     *status = PYNATIVE_SUCCESS;
     MS_LOG(INFO) << "RunOpInVM end";
@@ -754,7 +807,7 @@ AnfNodePtr PynativeExecutor::GetInput(const py::object &obj, const py::object &o
     if (graph_info_map_[df_builder_].param_map.count(obj_id) == 0) {
       auto free_param = df_builder_->add_parameter();
       free_param->set_name(param_name);
-      auto free_param_new = std::make_shared<ParamValuePy>(obj);
+      auto free_param_new = py::cast<ParamValuePtr>(obj.attr("_value"));
       free_param->set_default_param(free_param_new);
       free_param->debug_info()->set_name(param_name);
       MS_LOG(DEBUG) << "Top graph set free parameter " << obj_id;
@@ -926,7 +979,7 @@ std::vector<AnfNodePtr> PynativeExecutor::GetWeightsArgs(const py::object &weigh
       }
     }
   } else {
-    MS_LOG(EXCEPTION) << "training not paramter_tuple";
+    MS_LOG(DEBUG) << "training not paramter_tuple";
   }
   return w_args;
 }
@@ -950,8 +1003,9 @@ abstract::AbstractBasePtrList PynativeExecutor::GetArgsSpec(const py::args &args
   for (const auto &param : df_builder_->parameters()) {
     auto param_node = std::static_pointer_cast<Parameter>(param);
     if (param_node->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-      AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
+      const auto &param_value = param_node->default_param();
+      ValuePtr value = param_value->value();
+      AbstractBasePtr ptr = abstract::FromValue(value, true);
       if (ptr == nullptr) {
         MS_LOG(EXCEPTION) << "Args convert error";
       }
diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
similarity index 97%
rename from mindspore/ccsrc/pynative/pynative_execute.h
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute.h
index 83cbea88d4..152d58aca4 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.h
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
@@ -28,11 +28,11 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/numpy.h"
 
-#include "pynative/base.h"
+#include "pipeline/pynative/base.h"
 #include "utils/context/ms_context.h"
 #include "ir/anf.h"
-#include "pipeline/resource.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 namespace pynative {
diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
similarity index 98%
rename from mindspore/ccsrc/pynative/pynative_execute_ge.cc
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
index 8e10468236..897c21fc90 100644
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pynative/pynative_execute_ge.h"
+#include "pipeline/pynative/pynative_execute_ge.h"
 
 #include <typeinfo>
 #include <map>
@@ -24,10 +24,10 @@
 #include "utils/any.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/prim.h"
-#include "session/session_factory.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "backend/session/session_factory.h"
 #include "ir/tensor_py.h"
 
 const char SINGLE_OP_GRAPH[] = "single_op_graph";
diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
similarity index 90%
rename from mindspore/ccsrc/pynative/pynative_execute_ge.h
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
index 2dca3df018..2978278489 100644
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.h
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
@@ -23,10 +23,10 @@
 #include <memory>
 #include <unordered_map>
 
-#include "pynative/base.h"
-#include "transform/convert.h"
-#include "transform/graph_runner.h"
-#include "transform/types.h"
+#include "pipeline/pynative/base.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/graph_runner.h"
+#include "transform/graph_ir/types.h"
 #include "utils/context/ms_context.h"
 
 using GeTensor = ge::Tensor;
diff --git a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
index 5c7551a190..612ccde1a5 100644
--- a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
+++ b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
@@ -25,7 +25,7 @@
 #include <string>
 #include <fstream>
 #include "ir/tensor.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "predict/schema/inner/ms_generated.h"
 
 using TensorPtr = mindspore::tensor::TensorPtr;
diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.cc b/mindspore/ccsrc/predict/converter/kernel2ms.cc
index 1b1277aade..04aceb62eb 100644
--- a/mindspore/ccsrc/predict/converter/kernel2ms.cc
+++ b/mindspore/ccsrc/predict/converter/kernel2ms.cc
@@ -18,7 +18,7 @@
 #include <algorithm>
 #include "ir/anf.h"
 #include "predict/converter/lite_model/op_attr_packer.h"
-#include "mindspore/ccsrc/operator/ops.h"
+#include "mindspore/ccsrc/frontend/operator/ops.h"
 
 namespace mindspore {
 namespace executor {
diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.h b/mindspore/ccsrc/predict/converter/kernel2ms.h
index 7013f88107..8cbc89ed6a 100644
--- a/mindspore/ccsrc/predict/converter/kernel2ms.h
+++ b/mindspore/ccsrc/predict/converter/kernel2ms.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <vector>
 #include <utility>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "predict/converter/executor_tensor.h"
 #include "predict/schema/inner/ms_generated.h"
 #include "predict/converter/attr_utils/convert_util.h"
diff --git a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
index 89e38d1871..31f14ef73a 100644
--- a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
+++ b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <unordered_map>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "predict/schema/inner/ms_generated.h"
 
 static constexpr size_t kNIndex = 0;
diff --git a/mindspore/ccsrc/predict/predict.h b/mindspore/ccsrc/predict/predict.h
index 7c65f16619..9125451492 100644
--- a/mindspore/ccsrc/predict/predict.h
+++ b/mindspore/ccsrc/predict/predict.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include "predict/converter/kernel2ms.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt
similarity index 96%
rename from mindspore/ccsrc/device/CMakeLists.txt
rename to mindspore/ccsrc/runtime/device/CMakeLists.txt
index 652c04d4cd..9c95aee0dc 100644
--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@@ -62,4 +62,4 @@ endif ()
 
 set_property(SOURCE ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST}
     PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-add_library(_mindspore_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST})
+add_library(_mindspore_runtime_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST})
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/ascend_device_address.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
index c4b8717fa5..1a87f3e6af 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/ascend_device_address.h"
+#include "runtime/device/ascend/ascend_device_address.h"
 #include <memory>
 #include <vector>
 #include <set>
 #include <algorithm>
 #include "runtime/mem.h"
-#include "device/kernel_runtime_manager.h"
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "ir/dtype/type.h"
 #include "ir/tensor.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 #include "common/trans.h"
@@ -303,12 +303,22 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const std::vector<int
   return sync_ok;
 }
 
+void AscendDeviceAddress::UpdateCommunicationAddress() {
+  MS_EXCEPTION_IF_NULL(ptr_);
+  communication_ptr_ = reinterpret_cast<uint8_t *>(ptr_) - kMemAlignSize;
+}
+
 AscendDeviceAddress::~AscendDeviceAddress() {
   if (ptr_ == nullptr) {
     return;
   }
   if (from_mem_pool_) {
-    AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
+    if (communication_ptr_ != nullptr) {
+      AscendMemoryPool::GetInstance().FreeTensorMem(communication_ptr_);
+      communication_ptr_ = nullptr;
+    } else {
+      AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
+    }
     ptr_ = nullptr;
   }
 }
@@ -360,12 +370,15 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
 #ifdef ENABLE_DEBUGGER
 bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order,
                                         const std::string &host_fmt, const std::vector<int> &host_shape,
-                                        TypeId host_type, size_t slot, Debugger *debugger) const {
+                                        TypeId host_type, size_t slot, Debugger *debugger, bool keep_prev) const {
   bool ret = false;
-
-  DebugServices *debug_services = debugger->get_debug_services();
-  TensorLoader *tensor_loader = debug_services->get_tensor_loader();
-
+  DebugServices *debug_services = debugger->debug_services();
+  TensorLoader *tensor_loader = debug_services->tensor_loader();
+  // TensorData is freed up in AscendSession class
+  auto tensor_data = std::make_shared<mindspore::TensorData>();
+  tensor_data->SetName(tensor_name);
+  tensor_data->SetExecutionOrder(execution_order);
+  tensor_data->SetSlot(slot);
   if (trans_flag) {
     MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
     mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
@@ -375,28 +388,18 @@ bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tens
       MS_LOG(ERROR) << "Copy device mem to host failed";
       return ret;
     }
-    auto tensor_data = std::make_shared<mindspore::TensorData>();
-    tensor_data->SetName(tensor_name);
-    tensor_data->SetExecutionOrder(execution_order);
     tensor_data->SetTensor(out_tensor);
-    tensor_data->SetSlot(slot);
-    ret = tensor_loader->LoadNewTensor(tensor_data);
   } else {
     mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
     size_t host_size = out_tensor->data().nbytes();
     auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST);
-
-    auto tensor_data = std::make_shared<mindspore::TensorData>();
-    tensor_data->SetName(tensor_name);
-    tensor_data->SetExecutionOrder(execution_order);
-    tensor_data->SetTensor(out_tensor);
-    tensor_data->SetSlot(slot);
-    ret = tensor_loader->LoadNewTensor(tensor_data);
     if (ret_rt_memcpy != RT_ERROR_NONE) {
       MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
     }
     MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
+    tensor_data->SetTensor(out_tensor);
   }
+  ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev);
   return ret;
 }
 #endif
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
similarity index 90%
rename from mindspore/ccsrc/device/ascend/ascend_device_address.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
index 16b9f7817a..78d7006b56 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "device/device_address.h"
-#include "device/ascend/ascend_memory_pool.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
 #include "ir/dtype.h"
 
 namespace mindspore {
@@ -39,13 +39,15 @@ class AscendDeviceAddress : public DeviceAddress {
   bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override;
   bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override;
   DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; }
+  void UpdateCommunicationAddress() override;
 #ifdef ENABLE_DUMP_E2E
   bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
                      const std::vector<int> &host_shape, TypeId host_type) const;
 #endif
 #ifdef ENABLE_DEBUGGER
   bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt,
-                     const std::vector<int> &host_shape, TypeId host_type, size_t slot, Debugger *debugger) const;
+                     const std::vector<int> &host_shape, TypeId host_type, size_t slot, Debugger *debugger,
+                     bool keep_prev) const;
 #endif
 
  private:
@@ -53,6 +55,7 @@ class AscendDeviceAddress : public DeviceAddress {
   bool ConvertFormatAndSyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
                                         const void *host_ptr) const;
   void SyncStream() const;
+  uint8_t *communication_ptr_{nullptr};
 };
 using AscendDeviceAddressPtr = std::shared_ptr<AscendDeviceAddress>;
 }  // namespace ascend
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
similarity index 87%
rename from mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
index efdcb98755..3ab3a52d42 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@@ -14,34 +14,35 @@
  * limitations under the License.
  */
 #define PATH_MAX 0x3ffff
-#include "device/ascend/ascend_kernel_runtime.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
 #include <string>
 #include <vector>
 #include <memory>
 #include <utility>
 #include <exception>
 #include <algorithm>
-#include "device/ascend/ascend_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "utils/context/ms_context.h"
 #include "utils/mpi/mpi_config.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include "hccl/hcom.h"
 #include "common/trans.h"
 #include "runtime/context.h"
-#include "device/ascend/ascend_label_assign.h"
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_memory_pool.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
 #include "framework/ge_runtime/model_runner.h"
-#include "device/ascend/tasksink/task_generator.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
-#include "device/ascend/ascend_memory_manager.h"
+#include "runtime/device/ascend/tasksink/task_generator.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#include "runtime/device/ascend/ascend_memory_manager.h"
 #include "debug/tensor_load.h"
 
+using ge::model_runner::ModelRunner;
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
@@ -90,9 +91,16 @@ std::string GetRankId() {
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
 
 void AscendKernelRuntime::ClearGraphModelMap() {
+#ifdef ENABLE_DATA_DUMP
+  for (auto &iter : graph_data_dumper_) {
+    MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
+    iter.second->UnloadDumpInfo();
+  }
+  graph_data_dumper_.clear();
+#endif
   for (auto &iter : graph_model_map_) {
     MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
-    auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
+    auto ret = ModelRunner::Instance().UnloadModel(iter.first);
     if (!ret) {
       MS_LOG(ERROR) << "UnloadModel failed";
     }
@@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
     return;
   }
   MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
-  auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
+  auto ret = ModelRunner::Instance().UnloadModel(iter->first);
   if (!ret) {
     MS_LOG(ERROR) << "UnloadModel failed";
   }
@@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
   }
 #endif
 
+#ifdef ENABLE_DATA_DUMP
+  DataDumpParser::GetInstance().ParseDumpConfig();
+#endif
+
   // Start up profiling before rtSetDevice
   ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
   if (!ret) {
@@ -299,15 +311,24 @@ bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph) {
 namespace {
 void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
   MS_EXCEPTION_IF_NULL(graph);
+  // trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
   bool trans_flag = false;
   const auto &apply_kernels = graph->execution_order();
   // for kernels, execution order starts from 1
   int exec_order = 1;
+  auto debugger_ = mindspore::Debugger::GetInstance();
+  DebugServices *debug_services = debugger_->debug_services();
+  auto watchpoint_table = debug_services->GetWatchpointTable();
   for (const auto &node : apply_kernels) {
     MS_EXCEPTION_IF_NULL(node);
     auto node_name = AnfAlgo::GetCNodeName(node);
     std::string kernel_name = node->fullname_with_scope();
     auto output_size = AnfAlgo::GetOutputTensorNum(node);
+    if (debugger_->partial_memory()) {
+      if (!debug_services->IsWatchPoint(kernel_name, watchpoint_table)) {
+        continue;
+      }
+    }
     for (size_t j = 0; j < output_size; ++j) {
       auto addr = AnfAlgo::GetOutputAddr(node, j);
       auto type = AnfAlgo::GetOutputInferDataType(node, j);
@@ -322,7 +343,8 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
         (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
                              [](size_t inner_item) { return SizeToInt(inner_item); });
       }
-      auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger);
+      auto ret =
+        ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger, false);
       if (!ret) {
         MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name
                       << ", host_format:" << format << ".!";
@@ -334,6 +356,7 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
 
 void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) {
   MS_EXCEPTION_IF_NULL(graph);
+  // trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
   bool trans_flag = false;
   const auto &parameters = graph->inputs();
   // for parameters, set its execution order to be 0;
@@ -356,7 +379,8 @@ void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger)
       (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
                            [](size_t inner_item) { return SizeToInt(inner_item); });
     }
-    auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger);
+    auto ret =
+      ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger, true);
     if (!ret) {
       MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name
                     << ", host_format:" << format << ".!";
@@ -438,7 +462,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
                << ", wait_active_stream_list size:" << wait_active_stream_list.size()
                << ", force_copy_stream_list size:" << force_copy_stream_list.size();
   std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
-  std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
+  auto model = std::make_shared<ge::model_runner::DavinciModel>(
     task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
     0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
     resource_manager.get_cur_event_num(), 0);
@@ -475,21 +499,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
 
   std::shared_ptr<ge::ModelListener> listener;
   MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
-  bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
-                                                                           model_iter->second, listener);
+  bool status =
+    ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
   if (!status) {
     MS_LOG(EXCEPTION) << "Load Task Failed";
   }
   if (ProfilingManager::GetInstance().IsProfiling()) {
-    auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
-    auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
+    auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
+    auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
     ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
   }
+
+#ifdef ENABLE_DATA_DUMP
+  LaunchDataDump(NOT_NULL(graph));
+#endif
+  if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
+    MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
+    return false;
+  }
   return true;
 }
 
+#ifdef ENABLE_DATA_DUMP
+void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
+  if (!DataDumpParser::GetInstance().DumpEnabled()) {
+    return;
+  }
+  auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
+  auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
+  MS_EXCEPTION_IF_NULL(data_dumper);
+  data_dumper->LoadDumpInfo();
+  auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
+  if (!ret.second) {
+    MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
+  }
+}
+#endif
+
 void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
-  auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
+  auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
   auto graph_task_names = ProfilingUtils::graph_kernel_name();
   auto iter = graph_task_names.find(graph_id);
   if (iter != graph_task_names.end()) {
@@ -522,7 +570,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
     return false;
   }
 
-  bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
+  bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
   if (!status) {
     MS_LOG(ERROR) << "Run task failed";
     DebugTaskIdName(graph->graph_id());
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
similarity index 86%
rename from mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
index 69ba8b295a..4f1663d4d5 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
@@ -19,11 +19,15 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "runtime/context.h"
 #include "framework/ge_runtime/davinci_model.h"
-#include "device/kernel_runtime_manager.h"
-#include "session/session_basic.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/session/session_basic.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#include "runtime/device/ascend/dump/data_dumper.h"
+#endif
 
 using ge::model_runner::TaskInfo;
 using std::unordered_map;
@@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
   bool initialized_{false};
   unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
   unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
+#ifdef ENABLE_DATA_DUMP
+  void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
+  unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
+#endif
 };
 
 MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
diff --git a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/ascend_label_assign.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
index 2db81a1725..035f4dd8e3 100644
--- a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
@@ -17,8 +17,8 @@
 #include <vector>
 #include <string>
 #include <set>
-#include "device/ascend/ascend_label_assign.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 static constexpr uint32_t kLabelGotoLabelId = 1;
 static constexpr uint32_t kLabelSwitchLabelId = 2;
diff --git a/mindspore/ccsrc/device/ascend/ascend_label_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
similarity index 97%
rename from mindspore/ccsrc/device/ascend/ascend_label_assign.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
index 98055576eb..6b09f2940e 100644
--- a/mindspore/ccsrc/device/ascend/ascend_label_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <map>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
new file mode 100644
index 0000000000..f9da0850c6
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
@@ -0,0 +1,137 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "runtime/device/ascend/ascend_memory_manager.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
+#include "utils/context/ms_context.h"
+#include "runtime/mem.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+constexpr uint64_t kAscendDeviceMemGB = 30;
+constexpr uint64_t kMemSizeGB = 30;
+constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB);
+
+void AscendMemoryManager::MallocDeviceMemory() {
+  auto context_mem = GetDeviceMemSizeFromContext();
+  device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem;
+  dynamic_mem_offset_ = device_mem_size_;
+  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), dynamic_mem_offset_, RT_MEMORY_HBM);
+
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << dynamic_mem_offset_ << "] fail, ret[" << ret << "]";
+  }
+
+  AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_base_);
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+}
+
+uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  auto variable_memory_max_size = context->variable_memory_max_size();
+  if (variable_memory_max_size == "0") {
+    return 0;
+  }
+  MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
+  auto pos = variable_memory_max_size.find('*');
+  if (pos == std::string::npos) {
+    MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
+  }
+  auto gb_str = variable_memory_max_size.substr(0, pos);
+  auto gb_var = std::stoull(gb_str);
+  MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
+  if (gb_var > kAscendDeviceMemGB || gb_var == 0) {
+    MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB";
+  }
+  return gb_var << kMemSizeGB;
+}
+
+void AscendMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    auto ret = rtFree(device_mem_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_base_ = nullptr;
+  }
+  if (device_mem_pool_base_ != nullptr) {
+    auto ret = rtFree(device_mem_pool_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_pool_base_ = nullptr;
+  }
+}
+
+void AscendMemoryManager::ResetDynamicMemory() {
+  total_dynamic_size_ = 0;
+  dynamic_mem_offset_ = device_mem_size_;
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+}
+
+void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
+  auto align_size = GetCommonAlignSize(size);
+  return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
+}
+
+uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (communication_mem) {
+    // create protect area [kMemAlignSize -- data -- kMemAlignSize]
+    uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
+    return alloc_address + kMemAlignSize;
+  } else {
+    return reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
+  }
+}
+
+uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (dynamic_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
+                      << "]) malloc [" << align_size << "] failed!";
+  }
+  auto new_offset = dynamic_mem_offset_ - align_size;
+  auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
+  if (new_offset <= device_mem_pool_offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
+                      << "] memory pool[" << device_mem_pool_offset << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_dynamic_size_ += align_size;
+  dynamic_mem_offset_ = new_offset;
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+  if (communication_mem) {
+    // create protect area [kMemAlignSize -- data -- kMemAlignSize]
+    return device_mem_base_ + new_offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + new_offset;
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
similarity index 84%
rename from mindspore/ccsrc/device/ascend/ascend_memory_manager.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
index 7fdd8f553e..720f15be00 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
@@ -27,8 +27,13 @@ class AscendMemoryManager : public MemoryManager {
 
   void MallocDeviceMemory() override;
   void FreeDeviceMemory() override;
+  void ResetDynamicMemory() override;
   void *MallocMemFromMemPool(size_t size) override;
 
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
+  uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
+
  private:
   uint8_t *device_mem_pool_base_{nullptr};
   uint64_t device_mem_pool_size_{0};
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc
new file mode 100644
index 0000000000..fe71ba43fc
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/ascend/ascend_memory_pool.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
+  if (size == 0) {
+    MS_LOG(EXCEPTION) << "Can not alloc memory size(0) in memory pool !";
+  }
+  if (device_mem_pool_offset_ + size >= graph_dynamic_mem_offset_) {
+    MS_LOG(EXCEPTION) << "Failed to alloc memory pool memory, the current device_mem_pool_offset_ ["
+                      << device_mem_pool_offset_ << "], current graph_dynamic_mem_offset_ " << graph_dynamic_mem_offset_
+                      << "], need memory size [" << size << "]";
+  }
+  *addr = device_mem_pool_base_ + device_mem_pool_offset_;
+  device_mem_pool_offset_ += size;
+  if (*addr == nullptr) {
+    MS_LOG(EXCEPTION) << "Alloc device address is nullptr, failed to alloc memory pool memory!";
+  }
+  return size;
+}
+
+bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
+  MS_EXCEPTION_IF_NULL(addr);
+  return true;
+}
+
+size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
+  if (size == 0) {
+    MS_LOG(EXCEPTION) << "The align memory size is a zero !";
+  }
+  return size;
+}
+
+void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
+  MS_EXCEPTION_IF_NULL(device_mem_pool_base);
+  device_mem_pool_base_ = device_mem_pool_base;
+}
+
+void AscendMemoryPool::set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset) {
+  graph_dynamic_mem_offset_ = graph_dynamic_mem_offset;
+}
+
+uint64_t AscendMemoryPool::device_mem_pool_offset() const { return device_mem_pool_offset_; }
+
+size_t AscendMemoryPool::free_mem_size() {
+  if (graph_dynamic_mem_offset_ < device_mem_pool_offset_) {
+    MS_LOG(EXCEPTION) << "graph dynamic mem offset [" << graph_dynamic_mem_offset_
+                      << "] less than device mem pool offset [" << device_mem_pool_offset_ << "]!";
+  }
+  return graph_dynamic_mem_offset_ - device_mem_pool_offset_;
+}
+
+size_t AscendMemoryPool::total_mem_size() { return graph_dynamic_mem_offset_ == 0 ? 0 : graph_dynamic_mem_offset_ - 1; }
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_pool.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
similarity index 78%
rename from mindspore/ccsrc/device/ascend/ascend_memory_pool.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
index 7fa3ebc23e..7a75198ab4 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_pool.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
 
 #include <memory>
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 
 namespace mindspore {
 namespace device {
@@ -32,11 +32,9 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
   size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
   bool FreeDeviceMem(const DeviceMemPtr &addr) override;
   void set_device_mem_pool_base(uint8_t *device_mem_pool_base);
-  void set_device_mem_pool_size(uint64_t device_mem_pool_size) {
-    device_mem_pool_size_ = device_mem_pool_size;
-    free_mem_size_ = device_mem_pool_size_;
-    total_mem_size_ = free_mem_size_;
-  }
+  void set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset);
+
+  uint64_t device_mem_pool_offset() const;
   size_t free_mem_size() override;
   size_t total_mem_size() override;
 
@@ -48,16 +46,12 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
  protected:
   // The real size by memory alloc aligned.
   size_t AlignMemorySize(size_t size) const override;
-  // Get the minimum memory unit size using for dynamic extend.
-  size_t mem_alloc_unit_size() const override;
 
  private:
   AscendMemoryPool() = default;
-  bool has_malloc_{false};
   uint8_t *device_mem_pool_base_{nullptr};
-  uint64_t device_mem_pool_size_{0};
-  size_t free_mem_size_{0};
-  size_t total_mem_size_{0};
+  uint64_t device_mem_pool_offset_{0};
+  uint64_t graph_dynamic_mem_offset_{0};
 };
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
similarity index 76%
rename from mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
index 736d6203e9..7cf5b94d45 100644
--- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
 
 #include <algorithm>
 #include <utility>
@@ -22,10 +22,10 @@
 #include "ir/manager.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_adjust.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_adjust.h"
 #include "predict/generator/utils/ir_model_util.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -48,6 +48,12 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
     CheckResourceAssign(graph_ptr);
     MS_LOG(INFO) << "After finish stream assign";
 
+    FindStreamRelations(graph_ptr);
+    PrintStreamRelations();
+    GetStreamRelations();
+    PrintStreamGroups();
+    FindEventRelations(graph_ptr);
+
     // Get info for D Model
     AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
     generator::IRModelUtil::GetInstance().set_event_num(resource_manager.get_cur_event_num());
@@ -501,6 +507,8 @@ void AscendStreamAssign::InsertEventHcomDependCommon(const NotNull<KernelGraphPt
       auto recv = CreateRecvApplyKernel(graph_ptr, cur_event_id, cur_stream_id);
       cnodes.emplace_back(recv);
       cnodes.emplace_back(cur_cnode_ptr);
+    } else {
+      cnodes.emplace_back(cur_cnode_ptr);
     }
     pre_stream_id = cur_stream_id;
   }
@@ -910,7 +918,351 @@ void AscendStreamAssign::Reset() {
   common_stream_map_.clear();
   processed_streams_.clear();
   need_first_active_streams_.clear();
+  stream_groups_.clear();
+  stream_relations_.clear();
+  event_map_.clear();
+}
+
+// section 10
+bool AscendStreamAssign::IsVecExist(std::vector<uint32_t> *group) {
+  auto group_size = group->size();
+  if (group_size == 0) {
+    return false;
+  }
+  for (const auto &item : stream_groups_) {
+    if (item.size() < group->size()) {
+      continue;
+    }
+
+    bool flag = true;
+    for (size_t i = 0; i < group_size; i++) {
+      if (item[i] != group->at(i)) {
+        flag = false;
+        break;
+      }
+    }
+
+    if (flag) {
+      return true;
+    } else {
+      continue;
+    }
+  }
+
+  return false;
 }
+
+void AscendStreamAssign::DFS(uint32_t start, std::vector<uint32_t> *group) {
+  auto it = stream_relations_.find(start);
+  if (it == stream_relations_.end()) {
+    if (!IsVecExist(group)) {
+      stream_groups_.emplace_back(*group);
+    } else {
+      MS_LOG(WARNING) << "DFS should not print this log";
+    }
+    return;
+  }
+
+  vector<uint32_t> active_streams = stream_relations_[start];
+
+  for (const auto &item : active_streams) {
+    group->emplace_back(item);
+    DFS(item, group);
+    group->pop_back();
+  }
+}
+
+void AscendStreamAssign::GetStreamRelations() {
+  for (const auto &start : need_first_active_streams_) {
+    vector<uint32_t> group{start};
+    DFS(start, &group);
+  }
+}
+
+void AscendStreamAssign::FindStreamRelations(const NotNull<KernelGraphPtr> &graph_ptr) {
+  AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
+  auto stream_num = resource_manager.get_cur_stream_num();
+  if (stream_num <= 1) {
+    return;
+  }
+
+  auto exe_orders = graph_ptr->execution_order();
+  for (size_t i = 0; i < exe_orders.size(); i++) {
+    auto cur_cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cur_cnode);
+    if (name != kStreamSwitchOpName && name != kStreamActiveOpName) {
+      continue;
+    }
+
+    // support:streamswitch is begin of the stream
+    if (name == kStreamSwitchOpName) {
+      GetStreamSwitchStreamRelation(cur_cnode);
+    }
+
+    if (name == kStreamActiveOpName) {
+      GetStreamActiveStreamRelation(graph_ptr, i);
+    }
+  }
+}
+
+void AscendStreamAssign::GetStreamSwitchStreamRelation(const CNodePtr &node_ptr) {
+  MS_EXCEPTION_IF_NULL(node_ptr);
+  auto cur_stream_id = AnfAlgo::GetStreamId(node_ptr);
+  auto true_stream_id = AnfAlgo::GetNodeAttr<uint32_t>(node_ptr, kAttrTrueBranchStream);
+  if (true_stream_id <= cur_stream_id) {
+    MS_LOG(ERROR) << "StreamSwitch self stream id " << cur_stream_id
+                  << " is greater than true branch stream id:" << true_stream_id;
+  }
+  auto it = stream_relations_.find(cur_stream_id);
+  if (it == stream_relations_.end()) {
+    stream_relations_[cur_stream_id] = {true_stream_id};
+  } else {
+    auto iter =
+      std::find(stream_relations_[cur_stream_id].begin(), stream_relations_[cur_stream_id].end(), true_stream_id);
+    if (iter == stream_relations_[cur_stream_id].end()) {
+      stream_relations_[cur_stream_id].emplace_back(true_stream_id);
+    }
+  }
+}
+
+void AscendStreamAssign::GetStreamActiveStreamRelation(const NotNull<KernelGraphPtr> &graph_ptr, size_t index) {
+  StreamActiveKind kind = GetStreamActiveKind(graph_ptr, index);
+  if (kind == kInvalid) {
+    MS_LOG(INFO) << "Invalid streamActive kind";
+    return;
+  }
+
+  auto orders = graph_ptr->execution_order();
+  auto cur_cnode = orders[index];
+  auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode);
+  auto active_list = AnfAlgo::GetNodeAttr<vector<uint32_t>>(cur_cnode, kAttrActiveStreamList);
+  if (kind == kHead) {
+    uint32_t active_current_node = GetStreamByActivedStream(cur_stream_id);
+    if (active_current_node == kInvalidStreamId) {
+      MS_LOG(EXCEPTION) << "No stream to active streamactive stream";
+    }
+
+    for (const auto &item : active_list) {
+      if (item <= active_current_node) {
+        MS_LOG(WARNING) << "Actived stream is less than activing stream";
+        continue;
+      }
+      auto it =
+        std::find(stream_relations_[active_current_node].begin(), stream_relations_[active_current_node].end(), item);
+      if (it == stream_relations_[active_current_node].end()) {
+        stream_relations_[active_current_node].emplace_back(item);
+      }
+    }
+  }
+
+  if (kind == kMiddle) {
+    for (const auto &stream : active_list) {
+      if (stream <= cur_stream_id) {
+        MS_LOG(INFO) << "MIDDLE StreamActive active stream is less than self stream, no need deal";
+      } else {
+        MS_LOG(ERROR) << "MIDDLE StreamActive active stream is greater than self stream, should not be exit now";
+      }
+    }
+  }
+
+  if (kind == kTail) {
+    auto it = stream_relations_.find(cur_stream_id);
+    if (it == stream_relations_.end()) {
+      stream_relations_[cur_stream_id] = active_list;
+    } else {
+      for (const auto &stream : active_list) {
+        if (stream <= cur_stream_id) {
+          MS_LOG(WARNING) << "Actived stream is less than activing stream";
+          continue;
+        }
+        auto iter = std::find(stream_relations_[cur_stream_id].begin(), stream_relations_[cur_stream_id].end(), stream);
+        if (iter == stream_relations_[cur_stream_id].end()) {
+          stream_relations_[cur_stream_id].emplace_back(stream);
+        }
+      }
+    }
+  }
+}
+
+StreamActiveKind AscendStreamAssign::GetStreamActiveKind(const NotNull<KernelGraphPtr> &graph_ptr, size_t index) {
+  auto exe_orders = graph_ptr->execution_order();
+  if (index >= exe_orders.size()) {
+    MS_LOG(EXCEPTION) << "Invalid op index:" << index;
+  }
+
+  auto cur_cnode = exe_orders[index];
+  auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode);
+  if (AnfAlgo::GetCNodeName(cur_cnode) != kStreamActiveOpName) {
+    MS_LOG(EXCEPTION) << "Current node name is not StreamActive";
+  }
+
+  if (index == 0) {
+    return kInvalid;
+  }
+
+  if (index == exe_orders.size() - 1) {
+    return kInvalid;
+  }
+
+  uint32_t pre_stream_id = UINT32_MAX;
+  uint32_t next_stream_id = UINT32_MAX;
+  int32_t start = SizeToInt(index) - 1;
+  for (int32_t i = start; i >= 0; i--) {
+    auto cnode = exe_orders[IntToSize(i)];
+    auto name = AnfAlgo::GetCNodeName(cnode);
+    if (name == kSendOpName || name == kRecvOpName) {
+      continue;
+    }
+
+    pre_stream_id = AnfAlgo::GetStreamId(cnode);
+    break;
+  }
+
+  for (size_t i = index + 1; i < exe_orders.size(); i++) {
+    auto cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cnode);
+    if (name == kSendOpName || name == kRecvOpName) {
+      continue;
+    }
+
+    next_stream_id = AnfAlgo::GetStreamId(cnode);
+    break;
+  }
+
+  // pre_stream_id = UINT32_MAX:means no node active current StreamActive
+  // next_stream_id = UINT32_MAX:means current StreamActive active no node
+  if (pre_stream_id == UINT32_MAX || next_stream_id == UINT32_MAX) {
+    return kInvalid;
+  }
+
+  if (cur_stream_id == pre_stream_id && cur_stream_id == next_stream_id) {
+    return kMiddle;
+  }
+
+  if (cur_stream_id == pre_stream_id) {
+    return kTail;
+  }
+
+  if (cur_stream_id == next_stream_id) {
+    return kHead;
+  }
+
+  return kInvalid;
+}
+
+uint32_t AscendStreamAssign::GetStreamByActivedStream(uint32_t actived_stream_id) {
+  if (stream_relations_.empty()) {
+    return kInvalidStreamId;
+  }
+
+  for (const auto &item : stream_relations_) {
+    auto it = std::find(item.second.begin(), item.second.end(), actived_stream_id);
+    if (it != item.second.end()) {
+      return item.first;
+    }
+  }
+
+  return kInvalidStreamId;
+}
+
+void AscendStreamAssign::PrintStreamRelations() {
+  MS_LOG(INFO) << "Stream relations size:" << stream_relations_.size();
+  for (const auto &item : stream_relations_) {
+    MS_LOG(INFO) << "Stream:" << item.first;
+    for (const auto &stream : item.second) {
+      MS_LOG(INFO) << "--actived stream id:" << stream;
+    }
+  }
+}
+
+void AscendStreamAssign::PrintStreamGroups() {
+  MS_LOG(INFO) << "Stream group size:" << stream_groups_.size();
+  for (const auto &item : stream_groups_) {
+    MS_LOG(INFO) << "Group:";
+    for (const auto &stream : item) {
+      MS_LOG(INFO) << "Stream id:" << stream;
+    }
+  }
+}
+
+// section 11
+bool AscendStreamAssign::IsSatisfiedEvent(uint32_t send_stream_id, uint32_t recv_stream_id) const {
+  size_t send_group = 0;
+  size_t recv_group = 0;
+  bool send_flag = true;
+  bool recv_flag = true;
+  for (size_t i = 0; i < stream_groups_.size(); i++) {
+    auto group = stream_groups_[i];
+    if (send_flag) {
+      auto it = std::find(group.begin(), group.end(), send_stream_id);
+      if (it != group.end()) {
+        send_group = i;
+        send_flag = false;
+      }
+    }
+
+    if (recv_flag) {
+      auto it = std::find(group.begin(), group.end(), recv_stream_id);
+      if (it != group.end()) {
+        recv_group = i;
+        recv_flag = false;
+      }
+    }
+  }
+
+  if (!(send_flag || recv_flag)) {
+    return (send_group != recv_group);
+  }
+
+  return false;
+}
+
+void AscendStreamAssign::FindEventRelations(const NotNull<KernelGraphPtr> &graph_ptr) {
+  AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
+  auto event_nums = resource_manager.get_cur_event_num();
+  if (event_nums == 0) {
+    return;
+  }
+  auto exe_orders = graph_ptr->execution_order();
+  // find all event info
+  for (size_t i = 0; i < exe_orders.size(); i++) {
+    auto cur_cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cur_cnode);
+    if (name == kSendOpName) {
+      event_map_[cur_cnode] = {};
+    }
+
+    if (name == kRecvOpName) {
+      auto recv_event_id = AnfAlgo::GetNodeAttr<uint32_t>(cur_cnode, kAttrEventId);
+      for (auto &item : event_map_) {
+        auto send_event_id = AnfAlgo::GetNodeAttr<uint32_t>(item.first, kAttrEventId);
+        if (recv_event_id == send_event_id) {
+          item.second = cur_cnode;
+          break;
+        }
+      }
+    }
+  }
+
+  // delete useless event info
+  auto begin = event_map_.begin();
+  while (begin != event_map_.end()) {
+    auto send_stream_id = AnfAlgo::GetStreamId(begin->first);
+    auto recv_stream_id = AnfAlgo::GetStreamId(begin->second);
+    bool flag = IsSatisfiedEvent(send_stream_id, recv_stream_id);
+    if (!flag) {
+      begin = event_map_.erase(begin);
+    } else {
+      begin++;
+    }
+  }
+
+  MS_LOG(INFO) << "Satisfied event info";
+  for (const auto &item : event_map_) {
+    MS_LOG(INFO) << "Event_id:" << AnfAlgo::GetNodeAttr<uint32_t>(item.first, kAttrEventId);
+  }
+}
+
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
similarity index 83%
rename from mindspore/ccsrc/device/ascend/ascend_stream_assign.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
index 625ab6ad6e..00fca60e8d 100644
--- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
@@ -28,7 +28,7 @@
 #include "runtime/base.h"
 #include "runtime/rt_model.h"
 #include "runtime/stream.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
 
 namespace mindspore {
@@ -94,6 +94,7 @@ class AscendResourceMng {
   uint32_t cur_event_num_{0};
 };
 
+enum StreamActiveKind { kInvalid = 0, kHead, kMiddle, kTail };
 class AscendStreamAssign {
  public:
   static AscendStreamAssign &GetInstance() {
@@ -109,6 +110,8 @@ class AscendStreamAssign {
   void GetWaitStreams(vector<uint32_t> *wait_active_stream_list);
   CNodePtr CreateSendApplyKernel(const NotNull<KernelGraphPtr> &graph_ptr, uint32_t event_id, uint32_t stream_id);
   CNodePtr CreateRecvApplyKernel(const NotNull<KernelGraphPtr> &graph_ptr, uint32_t event_id, uint32_t stream_id);
+  const std::vector<std::vector<uint32_t>> &get_stream_group() const { return stream_groups_; }
+  const std::map<CNodePtr, CNodePtr> &get_event_map() const { return event_map_; }
 
  private:
   AscendStreamAssign() = default;
@@ -147,6 +150,20 @@ class AscendStreamAssign {
                                           const CNodePtr &node);
   void GetParallelStream(uint32_t cur_stream_id, uint32_t stream_acitve_id, std::vector<uint32_t> *parallel_streams);
 
+  // function for memory resue
+  void GetStreamRelations();
+  void DFS(uint32_t start, std::vector<uint32_t> *group);
+  bool IsVecExist(std::vector<uint32_t> *group);
+  void FindStreamRelations(const NotNull<KernelGraphPtr> &graph_ptr);
+  void GetStreamSwitchStreamRelation(const CNodePtr &node_ptr);
+  void GetStreamActiveStreamRelation(const NotNull<KernelGraphPtr> &graph_ptr, size_t index);
+  StreamActiveKind GetStreamActiveKind(const NotNull<KernelGraphPtr> &graph_ptr, size_t index);
+  uint32_t GetStreamByActivedStream(uint32_t actived_stream_id);
+  void PrintStreamRelations();
+  void PrintStreamGroups();
+  void FindEventRelations(const NotNull<KernelGraphPtr> &graph_ptr);
+  bool IsSatisfiedEvent(uint32_t send_stream_id, uint32_t recv_stream_id) const;
+
   bool independent_stream_activated_{false};
   bool hcom_stream_activated_{false};
   std::map<uint32_t, uint32_t> independent_stream_map_{};
@@ -154,6 +171,11 @@ class AscendStreamAssign {
   std::map<uint32_t, uint32_t> common_stream_map_{};
   std::set<uint32_t> processed_streams_{};
   std::vector<uint32_t> need_first_active_streams_{};
+
+  // attr for memory copy reuse
+  std::map<uint32_t, std::vector<uint32_t>> stream_relations_{};
+  std::vector<std::vector<uint32_t>> stream_groups_{};
+  std::map<CNodePtr, CNodePtr> event_map_;
   // new policy end
 };
 }  // namespace ascend
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
new file mode 100644
index 0000000000..ab2c6b2748
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
@@ -0,0 +1,282 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef ENABLE_DATA_DUMP
+#include "runtime/device/ascend/dump/data_dumper.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include "utility"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/mem.h"
+#include "runtime/kernel.h"
+#include "runtime/device/ascend/dump/ge_dump.h"
+#include "proto/op_mapping_info.pb.h"
+#include "utils/context/ms_context.h"
+#include "debug/data_dump_parser.h"
+
+constexpr uint32_t kAicpuLoadFlag = 1;
+constexpr uint32_t kAicpuUnloadFlag = 0;
+constexpr uint32_t kTupleTaskId = 0;
+constexpr uint32_t kTupleStreamId = 1;
+constexpr uint32_t kTupleArgs = 2;
+constexpr uint32_t kCurrentStepTensorIndex = 0;
+constexpr uint32_t kCurrentEpochTensorIndex = 1;
+constexpr uint32_t kStepsPerEpochTensorIndex = 2;
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
+
+DataDumper::~DataDumper() {
+  ReleaseDevMem(&dev_load_mem_);
+  ReleaseDevMem(&dev_unload_mem_);
+}
+
+void DataDumper::LoadDumpInfo() {
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  aicpu::dump::OpMappingInfo dump_info;
+  SetOpMappingInfo(NOT_NULL(&dump_info));
+
+  auto kernels = kernel_graph_->execution_order();
+  for (const auto &kernel : kernels) {
+    MS_EXCEPTION_IF_NULL(kernel);
+    if (!KernelNeedDump(kernel)) {
+      continue;
+    }
+    MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
+    dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
+
+    aicpu::dump::Task task;
+    ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
+    MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
+    dump_info.mutable_task()->Add(std::move(task));
+  }
+  RtLoadDumpData(dump_info, &dev_load_mem_);
+  load_flag_ = true;
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
+}
+
+void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
+  if (!dump_path.has_value()) {
+    MS_LOG(EXCEPTION) << "Dump path invalid";
+  }
+  auto device_id = context_ptr->device_id();
+  dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
+  MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();
+
+  dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
+  dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
+  dump_info->set_model_id(kernel_graph_->graph_id());
+  dump_info->set_flag(kAicpuLoadFlag);
+
+  const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
+  if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
+    MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
+    return;
+  }
+  const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
+  const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
+  const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
+
+  MS_EXCEPTION_IF_NULL(current_step_tensor);
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
+
+  void *current_step = current_step_tensor->device_address()->ptr_;
+  void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
+  void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;
+
+  if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
+    dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
+    dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
+    dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
+  } else {
+    MS_LOG(INFO) << "Invalid ctrl tensor device address";
+  }
+}
+
+bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
+  if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
+      AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(kernel);
+  // dump all kernel if mode is set 0 in data_dump.json
+  return DataDumpParser::GetInstance().NeedDump(kernel->fullname_with_scope());
+}
+
+void DataDumper::UnloadDumpInfo() {
+  if (!load_flag_) {
+    MS_LOG(WARNING) << "Load not success, no need to unload";
+    return;
+  }
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();
+
+  aicpu::dump::OpMappingInfo op_mapping_info;
+  op_mapping_info.set_model_id(kernel_graph_->graph_id());
+  op_mapping_info.set_flag(kAicpuUnloadFlag);
+
+  for (const auto &kernel_name : dump_kernel_names_) {
+    aicpu::dump::Task task;
+    auto iter = runtime_info_map_.find(kernel_name);
+    if (iter == runtime_info_map_.end()) {
+      MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+    }
+    MS_EXCEPTION_IF_NULL(iter->second);
+    auto task_id = std::get<kTupleTaskId>(*iter->second);
+    task.set_task_id(task_id);
+    MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
+    op_mapping_info.mutable_task()->Add(std::move(task));
+  }
+
+  RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
+}
+
+void DataDumper::ReleaseDevMem(void **ptr) const {
+  if (ptr == nullptr) {
+    return;
+  }
+  if (*ptr != nullptr) {
+    rtError_t rt_error = rtFree(*ptr);
+    if (rt_error != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
+    }
+    *ptr = nullptr;
+  }
+}
+
+void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
+  dump_task->set_end_graph(false);
+  auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
+  if (iter == runtime_info_map_.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+  }
+  MS_EXCEPTION_IF_NULL(iter->second);
+  auto task_id = std::get<kTupleTaskId>(*iter->second);
+  auto stream_id = std::get<kTupleStreamId>(*iter->second);
+  auto args = std::get<kTupleArgs>(*iter->second);
+  MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
+
+  dump_task->set_task_id(task_id);
+  dump_task->set_stream_id(stream_id);
+  MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
+  dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
+  dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
+
+  DumpKernelOutput(kernel, args, dump_task);
+  DumpKernelInput(kernel, args, dump_task);
+}
+
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
+  std::string proto_str;
+  size_t proto_size = dump_info.ByteSizeLong();
+  bool ret = dump_info.SerializeToString(&proto_str);
+  if (!ret || proto_size == 0) {
+    MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
+  }
+
+  rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
+  }
+
+  if (ptr == nullptr) {
+    MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
+    return;
+  }
+  rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
+  }
+
+  MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
+  rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
+  }
+}
+
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
+  uint64_t offset = sizeof(void *) * input_size;
+  for (size_t i = 0; i < output_size; ++i) {
+    auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
+    auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
+
+    aicpu::dump::Output output;
+    output.set_data_type(GetGeDataType(data_type));
+    output.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(output.mutable_shape());
+    for (auto dim : output_shape) {
+      output.mutable_shape()->add_dim(dim);
+    }
+    output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
+    output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_output());
+    task->mutable_output()->Add(std::move(output));
+    offset += sizeof(void *);
+  }
+}
+
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  uint64_t offset = 0;
+  for (size_t i = 0; i < input_size; ++i) {
+    aicpu::dump::Input input;
+    auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
+    auto input_node = input_node_with_index.first;
+    auto input_index = input_node_with_index.second;
+    std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
+    auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
+    if (output_type == kTypeUnknown) {
+      MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
+      output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
+    }
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
+
+    input.set_data_type(GetGeDataType(output_type));
+    input.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(input.mutable_shape());
+    for (auto dim : output_shape) {
+      input.mutable_shape()->add_dim(dim);
+    }
+    input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_input());
+    task->mutable_input()->Add(std::move(input));
+    offset += sizeof(void *);
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h
new file mode 100644
index 0000000000..d99eb4db68
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#ifdef ENABLE_DATA_DUMP
+#include <tuple>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include "backend/session/kernel_graph.h"
+
+namespace aicpu {
+namespace dump {
+class OpMappingInfo;
+class Task;
+}  // namespace dump
+}  // namespace aicpu
+namespace mindspore {
+namespace device {
+namespace ascend {
+// tuple(op_name, task_id, stream_id, args)
+using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
+class DataDumper {
+ public:
+  DataDumper(const session::KernelGraph *kernel_graph,
+             const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
+      : load_flag_(false),
+        dev_load_mem_(nullptr),
+        dev_unload_mem_(nullptr),
+        kernel_graph_(kernel_graph),
+        runtime_info_map_(runtime_info_map) {}
+  ~DataDumper();
+  void LoadDumpInfo();
+
+  void UnloadDumpInfo();
+
+ private:
+  void ReleaseDevMem(void **ptr) const;
+  bool KernelNeedDump(const CNodePtr &kernel) const;
+  void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
+  void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
+
+  bool load_flag_;
+  void *dev_load_mem_;
+  void *dev_unload_mem_;
+  std::vector<std::string> dump_kernel_names_;
+  const session::KernelGraph *kernel_graph_;
+  std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h b/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h
new file mode 100644
index 0000000000..eae70c4b0b
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h
@@ -0,0 +1,120 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+
+#include <map>
+#include <string>
+#include "proto/ge_dtype.pb.h"
+#include "ir/dtype/type_id.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+static ge::proto::DataType GetGeDataType(TypeId type_id) {
+  static const std::map<TypeId, ge::proto::DataType> data_type_map = {
+    {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED},     {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
+    {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
+    {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8},     {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
+    {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16},   {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
+    {TypeId::kNumberTypeInt64, ge::proto::DT_INT64},     {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
+    {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64},   {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
+    {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
+  };
+  MS_LOG(INFO) << "Vm origin type_id:" << type_id;
+  auto iter = data_type_map.find(type_id);
+  if (iter == data_type_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
+  }
+  return iter->second;
+}
+
+enum GeFormat {
+  kFormat_NCHW = 0,   // NCHW
+  kFormat_NHWC,       // NHWC
+  kFormat_ND,         // Nd Tensor
+  kFormat_NC1HWC0,    // NC1HWC0
+  kFormat_FRACTAL_Z,  // FRACTAL_Z
+  kFormat_NC1C0HWPAD,
+  kFormat_NHWC1C0,
+  kFormat_FSR_NCHW,
+  kFormat_FRACTAL_DECONV,
+  kFormat_C1HWNC0,
+  kFormat_FRACTAL_DECONV_TRANSPOSE,
+  kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
+  kFormat_NC1HWC0_C04,    // NC1HWC0, C0 =4
+  kFormat_FRACTAL_Z_C04,  // FRACZ, C0 =4
+  kFormat_CHWN,
+  kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
+  kFormat_HWCN,
+  kFormat_NC1KHKWHWC0,  // KH,KW kernel h& kernel w maxpooling max output format
+  kFormat_BN_WEIGHT,
+  kFormat_FILTER_HWCK,  // filter input tensor format
+  kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
+  kFormat_HASHTABLE_LOOKUP_KEYS,
+  kFormat_HASHTABLE_LOOKUP_VALUE,
+  kFormat_HASHTABLE_LOOKUP_OUTPUT,
+  kFormat_HASHTABLE_LOOKUP_HITS = 24,
+  kFormat_C1HWNCoC0,
+  kFormat_MD,
+  kFormat_NDHWC,
+  kFormat_FRACTAL_ZZ,
+  kFormat_FRACTAL_NZ,
+  kFormat_NCDHW,
+  kFormat_DHWCN,  // 3D filter input tensor format
+  kFormat_NDC1HWC0,
+  kFormat_FRACTAL_Z_3D,
+  kFormat_CN,
+  kFormat_NC,
+  kFormat_DHWNC,
+  kFormat_FRACTAL_Z_3D_TRANSPOSE,  // 3D filter(transpose) input tensor format
+  kFormat_RESERVED,
+  kFormat_ALL
+};
+
+static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
+  static const std::map<std::string, GeFormat> format_map = {
+    // default format: nchw, fractal_nz?
+    {kOpFormat_DEFAULT, kFormat_NCHW},
+    {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
+    {kOpFormat_ND, kFormat_ND},
+    {kOpFormat_NCHW, kFormat_NCHW},
+    {kOpFormat_NHWC, kFormat_NHWC},
+    {kOpFormat_HWCN, kFormat_HWCN},
+    {kOpFormat_NC1HWC0, kFormat_NC1HWC0},
+    {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
+    {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
+    {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
+    {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
+    {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
+    {kOpFormat_NDHWC, kFormat_NDHWC},
+  };
+  MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
+  if (format == kOpFormat_DEFAULT) {
+    return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
+  }
+  auto iter = format_map.find(format);
+  if (iter == format_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid format:" << format;
+  }
+  return iter->second;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto b/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto
new file mode 100644
index 0000000000..7c690524d9
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package ge.proto;
+
+enum DataType
+{
+  DT_UNDEFINED = 0;  // Used to indicate a DataType field has not been set.
+  DT_FLOAT     = 1;  // float type
+  DT_FLOAT16   = 2;  // fp16 type
+  DT_INT8      = 3;  // int8 type
+  DT_UINT8     = 4;  // uint8 type
+  DT_INT16     = 5;  // int16 type
+  DT_UINT16    = 6;  // uint16 type
+  DT_INT32     = 7;  //
+  DT_INT64     = 8;  // int64 type
+  DT_UINT32    = 9;  // unsigned int32
+  DT_UINT64    = 10;  // unsigned int64
+  DT_BOOL      = 11;  // bool type
+  DT_DOUBLE    = 12; // double type
+  DT_STRING = 13;            // string type
+  DT_DUAL_SUB_INT8 = 14;    /**< dual output int8 type */
+  DT_DUAL_SUB_UINT8 = 15;    /**< dual output uint8 type */
+  DT_COMPLEX64 = 16;         // complex64 type
+  DT_COMPLEX128 = 17;        // complex128 type
+  DT_QINT8 = 18;             // qint8 type
+  DT_QINT16 = 19;            // qint16 type
+  DT_QINT32 = 20;            // qint32 type
+  DT_QUINT8 = 21;            // quint8 type
+  DT_QUINT16 = 22;           // quint16 type
+  DT_RESOURCE  = 23;         // resource type
+  DT_STRING_REF = 24;        // string_ref type
+  DT_DUAL      = 25;              /**< dual output type */
+}
\ No newline at end of file
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto b/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto
new file mode 100644
index 0000000000..d3377c655d
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+package aicpu.dump;
+
+message Shape {
+  repeated uint64 dim = 1;
+}
+
+message Output {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  string original_name = 5;
+  int32 original_output_index = 6;
+  int32 original_output_data_type = 7;
+  int32 original_output_format = 8;
+  uint64 size = 9;
+};
+
+message Input {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  uint64 size = 5;
+}
+
+message Op {
+  string op_name = 1;
+  string op_type = 2;
+};
+
+message Task {
+  uint32 task_id = 1;
+  uint32 stream_id = 2;
+  Op op = 3;
+  repeated Output output = 4;
+  bool end_graph = 5;
+  repeated Input input = 6;
+};
+
+message OpMappingInfo {
+  string dump_path = 1;
+  oneof model_name_param {
+    string model_name = 2;
+  }
+  oneof model_id_param {
+    uint32 model_id = 3;
+  }
+  oneof step_id {
+    uint64 step_id_addr = 4;
+  }
+  oneof iterations_per_loop {
+    uint64 iterations_per_loop_addr = 5;
+  }
+  oneof loop_cond {
+    uint64 loop_cond_addr = 6;
+  }
+  uint32 flag = 7; // 0x01 load, 0x00 unload
+  repeated Task task = 8;
+  string dump_step = 9;
+};
diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
index bd0b436344..39cefcb020 100644
--- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
@@ -14,26 +14,26 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_build_ascend.h"
+#include "runtime/device/ascend/kernel_build_ascend.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 #include <functional>
 
-#include "device/ascend/kernel_select_ascend.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
-#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
-#include "kernel/aicpu/aicpu_kernel_build.h"
-#include "kernel/hccl/hccl_kernel_build.h"
-#include "kernel/rts/rt_kernel_build.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_build.h"
+#include "backend/kernel_compiler/rts/rt_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
similarity index 97%
rename from mindspore/ccsrc/device/ascend/kernel_build_ascend.h
rename to mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
index d987b6ce7a..0d2870eb0a 100644
--- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_BUILD_ASCEND_H_
 #define MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_BUILD_ASCEND_H_
 
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
index cde79a18f7..e8fc6c7a98 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
 
 #include <string>
 #include <vector>
@@ -26,15 +26,15 @@
 #include <unordered_set>
 #include "common/utils.h"
 #include "debug/anf_ir_dump.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/func_graph.h"
 #include "utils/context/ms_context.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/common_utils.h"
-#include "kernel/kernel_query.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
similarity index 96%
rename from mindspore/ccsrc/device/ascend/kernel_select_ascend.h
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
index 7b7a7b9fb9..8a93b77cec 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_SELECT_ASCEND_ANFALGO_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_SELECT_ASCEND_ANFALGO_H_
 #include "ir/anf.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
index db31460d31..c76f96728f 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_select_ascend.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/func_graph.h"
-#include "kernel/common_utils.h"
-#include "kernel/kernel_query.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace device {
@@ -362,8 +362,7 @@ void CheckFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector<AnfNod
       continue;
     }
     for (auto &node_user : iter->second) {
-      if (node_user.first->kernel_info() == nullptr ||
-          node_user.first->kernel_info()->select_kernel_build_info() == nullptr) {
+      if (node_user.first->kernel_info() == nullptr || !node_user.first->kernel_info()->has_build_info()) {
         // maybe not a real kernel.
         continue;
       }
diff --git a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
index 7790107aa1..4886c00a8e 100644
--- a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #include <string>
 #include "utils/log_adapter.h"
 using std::string;
diff --git a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.h b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/plugin_impl.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
similarity index 89%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
index a393409334..1f35cba0f7 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 #include "utils/log_adapter.h"
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
index a2fe5b852d..6117fe5ecf 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include <stdlib.h>
 #include <vector>
 #include "securec/include/securec.h"
 #include "./prof_mgr_core.h"
-#include "device/ascend/profiling/plugin_impl.h"
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
index 17ac4c4530..5b1db6a404 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "kernel/kernel.h"
-#include "device/ascend/profiling/profiling_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/reporter/graph_desc_reporter.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "utils/utils.h"
-#include "device/ascend/profiling/reporter/task_desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/task_desc_reporter.h"
 #include "utils/context/ms_context.h"
-#include "device/ascend/profiling/reporter/point_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/point_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
similarity index 98%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
index a3c7739447..de8ff2ac39 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <set>
 #include <unordered_map>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
similarity index 94%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
index cf80c07ca9..87e2bbcb06 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #include "utils/log_adapter.h"
 
 constexpr size_t kReportMaxLen = 2048;
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
similarity index 93%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
index c8e1b3ed62..f25c64ce05 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <memory>
 #include "toolchain/prof_reporter.h"
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 #include "utils/contract.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
index 1f2d1570bb..5c028986d4 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
@@ -16,8 +16,8 @@
 
 #include <vector>
 #include <memory>
-#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/reporter/graph_desc_reporter.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
index 10f78092f2..531f122cde 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <vector>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
index 0024ab9c22..42a1b4c286 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/reporter/point_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/point_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
index ae12672df6..c24535f4ec 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <string>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
index 082cb81e42..4aec72472c 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
@@ -17,7 +17,7 @@
 #include <iterator>
 #include <sstream>
 #include <algorithm>
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
similarity index 92%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
index 0bd66e31ef..26d722aa1a 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
@@ -15,9 +15,9 @@
  */
 
 #include <memory>
-#include "device/ascend/profiling/reporter/task_desc_reporter.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/ascend_kernel_mod.h"
+#include "runtime/device/ascend/profiling/reporter/task_desc_reporter.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
similarity index 96%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
index 087c691a5f..51526735a9 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <vector>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/readme.md b/mindspore/ccsrc/runtime/device/ascend/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/ascend/readme.md
rename to mindspore/ccsrc/runtime/device/ascend/readme.md
diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
index 3faeefb820..dba71edfd3 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/tasksink/runtime_utils.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
 
 #include <string>
 
diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.h b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/tasksink/runtime_utils.h
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.h
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
similarity index 96%
rename from mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
index e026459ae9..5aeb932105 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "device/ascend/tasksink/task_generator.h"
+#include "runtime/device/ascend/tasksink/task_generator.h"
 
 #include <runtime/rt.h>
-#include "kernel/task_stream.h"
+#include "backend/kernel_compiler/task_stream.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
   AddressPtrList kernel_outputs;
   auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
   MS_EXCEPTION_IF_NULL(kernel_mod);
+  kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
   if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
     for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
       auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/tasksink/task_generator.h
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
index ecd5889b04..134dec48b6 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
@@ -22,9 +22,9 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "ir/anf.h"
-#include "kernel/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
 #include "framework/ge_runtime/task_info.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/convert_tensor_utils.cc b/mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
similarity index 97%
rename from mindspore/ccsrc/device/convert_tensor_utils.cc
rename to mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
index bac72727c2..cfd9b0fbdf 100644
--- a/mindspore/ccsrc/device/convert_tensor_utils.cc
+++ b/mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include <vector>
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/convert_tensor_utils.h b/mindspore/ccsrc/runtime/device/convert_tensor_utils.h
similarity index 100%
rename from mindspore/ccsrc/device/convert_tensor_utils.h
rename to mindspore/ccsrc/runtime/device/convert_tensor_utils.h
diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/cpu_device_address.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
index 09ab0da12b..92269233bd 100644
--- a/mindspore/ccsrc/device/cpu/cpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include <vector>
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.h b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
similarity index 94%
rename from mindspore/ccsrc/device/cpu/cpu_device_address.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
index a041567f47..63cf171fa2 100644
--- a/mindspore/ccsrc/device/cpu/cpu_device_address.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
@@ -18,7 +18,7 @@
 
 #include <string>
 #include <vector>
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
index f46d10ed82..d2e41a1fbd 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_kernel_runtime.h"
+#include "runtime/device/cpu/cpu_kernel_runtime.h"
 #include <string>
 #include <vector>
 #include <memory>
@@ -22,15 +22,15 @@
 #include <functional>
 #include <unordered_map>
 #include <set>
-#include "kernel/kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "utils/context/ms_context.h"
 #include "utils/config_manager.h"
 #include "utils/profile.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/session_basic.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/session_basic.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
similarity index 92%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
index 354d2922c2..a29f840bfd 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
@@ -21,11 +21,11 @@
 #include <string>
 #include <unordered_map>
 #include <set>
-#include "device/kernel_runtime.h"
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "device/cpu/cpu_resource_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_runtime.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "runtime/device/cpu/cpu_resource_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/any.h"
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_resource_manager.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
index c69ef35305..c607260ab3 100644
--- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_resource_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/cpu/cpu_resource_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
similarity index 90%
rename from mindspore/ccsrc/device/cpu/cpu_resource_manager.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
index d130241464..d251760dd2 100644
--- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
@@ -18,10 +18,10 @@
 
 #include <vector>
 #include <unordered_map>
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "device/device_address.h"
-#include "device/cpu/cpu_simple_mem_plan.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/cpu/cpu_simple_mem_plan.h"
 namespace mindspore {
 namespace device {
 namespace cpu {
diff --git a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
index e6cb6ee53a..7838e66984 100644
--- a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_simple_mem_plan.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/cpu/cpu_simple_mem_plan.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
similarity index 94%
rename from mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
index 7633ef3f45..123e29fbe5 100644
--- a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <unordered_map>
-#include "session/kernel_graph.h"
-#include "device/device_address.h"
+#include "backend/session/kernel_graph.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
similarity index 98%
rename from mindspore/ccsrc/device/cpu/kernel_select_cpu.cc
rename to mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
index 9d72bcab89..9528e61ee9 100644
--- a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/cpu/kernel_select_cpu.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 
 #include <string>
 #include <memory>
 #include <algorithm>
 
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
similarity index 100%
rename from mindspore/ccsrc/device/cpu/kernel_select_cpu.h
rename to mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
similarity index 99%
rename from mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
rename to mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
index 9b06c0a40a..c124523d59 100644
--- a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #ifdef ENABLE_MPI
 #include <algorithm>
 #include <sstream>
diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.h
similarity index 100%
rename from mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h
rename to mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.h
diff --git a/mindspore/ccsrc/device/cpu/readme.md b/mindspore/ccsrc/runtime/device/cpu/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/cpu/readme.md
rename to mindspore/ccsrc/runtime/device/cpu/readme.md
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/runtime/device/device_address.h
similarity index 91%
rename from mindspore/ccsrc/device/device_address.h
rename to mindspore/ccsrc/runtime/device/device_address.h
index 0447cc2539..32f5fcced9 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/runtime/device/device_address.h
@@ -21,8 +21,7 @@
 #include <vector>
 #include <memory>
 #include "ir/dtype.h"
-
-using std::string;
+#include "ir/device_sync.h"
 
 namespace mindspore {
 namespace device {
@@ -34,6 +33,7 @@ class CPUKernelRuntime;
 namespace ascend {
 class AscendKernelRuntime;
 class AscendMemoryManager;
+class DataDumper;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
@@ -50,20 +50,18 @@ namespace device {
 enum class DeviceAddressStatus { kInDevice, kInHost, kInDeviceToHost, kInHostToDevice };
 enum class DeviceAddressType { kUnknown, kAscend, kCPU, kGPU };
 
-class DeviceAddress {
+class DeviceAddress : public mindspore::DeviceSync {
  public:
   explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {}
   explicit DeviceAddress(void *ptr, size_t size, const string &format, TypeId type_id)
       : ptr_(ptr), size_(size), format_(format), type_id_(type_id) {}
   virtual ~DeviceAddress() { ptr_ = nullptr; }
-  virtual bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const = 0;
-  virtual bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
-                                const void *host_ptr) const = 0;
   const void *GetPtr() const { return ptr_; }
   size_t GetSize() const { return size_; }
   std::string format() const { return format_; }
   TypeId type_id() const { return type_id_; }
   void set_host_shape(const std::vector<int> &shape) { host_shape_ = shape; }
+  virtual void UpdateCommunicationAddress() {}
   virtual void set_status(DeviceAddressStatus status) {}
   virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; }
   virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; }
@@ -89,6 +87,7 @@ class DeviceAddress {
   friend class mindspore::device::gpu::GPUMemoryManager;
   friend class mindspore::device::ascend::AscendKernelRuntime;
   friend class mindspore::device::ascend::AscendMemoryManager;
+  friend class mindspore::device::ascend::DataDumper;
 };
 
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.cc b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
similarity index 98%
rename from mindspore/ccsrc/device/gpu/blocking_queue.cc
rename to mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
index 3b5e75f551..547c2fbe64 100644
--- a/mindspore/ccsrc/device/gpu/blocking_queue.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "device/gpu/blocking_queue.h"
+#include "runtime/device/gpu/blocking_queue.h"
 #include <chrono>
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.h b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/blocking_queue.h
rename to mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
diff --git a/mindspore/ccsrc/device/gpu/cuda_common.h b/mindspore/ccsrc/runtime/device/gpu/cuda_common.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/cuda_common.h
rename to mindspore/ccsrc/runtime/device/gpu/cuda_common.h
index b79ba8bc28..2689fdbaca 100644
--- a/mindspore/ccsrc/device/gpu/cuda_common.h
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_common.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_CUDA_COMMON_H_
 
 #include <algorithm>
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/cuda_driver.cc b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
similarity index 99%
rename from mindspore/ccsrc/device/gpu/cuda_driver.cc
rename to mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
index 0dee53df64..1f5e5e3c22 100644
--- a/mindspore/ccsrc/device/gpu/cuda_driver.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/cuda_driver.h"
 #include <iostream>
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
diff --git a/mindspore/ccsrc/device/gpu/cuda_driver.h b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/cuda_driver.h
rename to mindspore/ccsrc/runtime/device/gpu/cuda_driver.h
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_common.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
similarity index 71%
rename from mindspore/ccsrc/device/gpu/distribution/collective_common.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
index f9564a0c74..5373f21d70 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_common.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
@@ -23,16 +23,17 @@
 namespace mindspore {
 namespace device {
 namespace gpu {
-#define MAX_HOSTNAME_LEN 1024
-#define CHECK_RET(expression, result, message)                                                                        \
-  {                                                                                                                   \
-    auto ret = (expression);                                                                                          \
-    if (ret != result) {                                                                                              \
-      std::ostringstream oss;                                                                                         \
-      oss << "Error in file " << __FILE__ << " | Error on line " << __LINE__ << " | GPU collective Error " << message \
-          << " | Error Number " << ret;                                                                               \
-      pybind11::pybind11_fail(oss.str());                                                                             \
-    }                                                                                                                 \
+constexpr int MAX_HOSTNAME_LEN = 1024;
+constexpr char NCCL_WORLD_GROUP[] = "nccl_world_group";
+#define CHECK_RET(expression, result, message)                                                                         \
+  {                                                                                                                    \
+    auto ret = (expression);                                                                                           \
+    if (ret != result) {                                                                                               \
+      std::ostringstream oss;                                                                                          \
+      oss << "Error in file " << __FILE__ << " | Error on line " << __LINE__ << " | GPU collective Error: " << message \
+          << " | Error Number " << ret;                                                                                \
+      pybind11::pybind11_fail(oss.str());                                                                              \
+    }                                                                                                                  \
   }
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
similarity index 93%
rename from mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
index 06497a2e82..80793042fd 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/collective_fake_init.h"
+#include "runtime/device/gpu/distribution/collective_fake_init.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.h
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_init.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/distribution/collective_init.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
index d7ab95bbe8..cba789b38d 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_init.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_init.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
similarity index 84%
rename from mindspore/ccsrc/device/gpu/distribution/collective_init.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
index 424abcf470..464492d50f 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_init.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
@@ -18,6 +18,8 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_INIT_H_
 
 #include <dlfcn.h>
+#include <vector>
+#include <string>
 
 namespace mindspore {
 namespace device {
@@ -25,6 +27,10 @@ namespace gpu {
 using InitMPI = void (*)();
 using InitNCCLComm = void (*)();
 using GetLocalRankId = int (*)();
+using CreateCommGroupFunc = bool (*)(const std::string &, const std::vector<unsigned int> &);
+using GetRankIDByGroupFunc = int (*)(const std::string &);
+using GetGroupSizeFunc = int (*)(const std::string &);
+using DestroyGroupFunc = bool (*)(const std::string &);
 
 class CollectiveInitializer {
  public:
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
similarity index 75%
rename from mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
index 5fb0f74849..f427905afa 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
@@ -20,8 +20,9 @@
 #include <memory>
 #include <string>
 #include <iostream>
-#include "device/gpu/distribution/mpi_wrapper.h"
-#include "device/gpu/distribution/nccl_wrapper.h"
+#include <vector>
+#include "runtime/device/gpu/distribution/mpi_wrapper.h"
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
 
 #ifndef EXPORT_WRAPPER
 #define EXPORT_WRAPPER __attribute__((visibility("default")))
@@ -36,6 +37,22 @@ extern "C" EXPORT_WRAPPER int local_rank_id() { return MPIWrapper::instance().lo
 
 extern "C" EXPORT_WRAPPER void InitNCCLComm() { NCCLWrapper::instance().InitNCCLComm(); }
 
+extern "C" EXPORT_WRAPPER bool CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &ranks) {
+  return MPIWrapper::instance().CreateCommGroup(group_name, ranks);
+}
+
+extern "C" EXPORT_WRAPPER int GetRankIDByGroup(const std::string &group_name) {
+  return MPIWrapper::instance().GetRankIDByGroup(group_name);
+}
+
+extern "C" EXPORT_WRAPPER int GetGroupSize(const std::string &group_name) {
+  return MPIWrapper::instance().GetGroupSize(group_name);
+}
+
+extern "C" EXPORT_WRAPPER bool DestroyGroup(const std::string &group_name) {
+  return MPIWrapper::instance().DestroyGroup(group_name);
+}
+
 extern "C" EXPORT_WRAPPER ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count,
                                                  ncclDataType_t data_type, ncclRedOp_t reduce_type,
                                                  cudaStream_t stream) {
diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
new file mode 100644
index 0000000000..08ec320cab
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
@@ -0,0 +1,156 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/gpu/distribution/mpi_wrapper.h"
+#include <cuda_runtime_api.h>
+#include <string>
+#include <vector>
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
+
+namespace mindspore {
+namespace device {
+namespace gpu {
+MPIWrapper::MPIWrapper() : rank_id_(0), rank_size_(0), local_rank_id_(0) { Init(); }
+
+MPIWrapper::~MPIWrapper() {
+  int finalized;
+  MPI_Finalized(&finalized);
+  if (finalized == 0) {
+    MPI_Finalize();
+  }
+}
+
+MPIWrapper &MPIWrapper::instance() {
+  static MPIWrapper instance;
+  return instance;
+}
+
+int MPIWrapper::local_rank_id() const { return local_rank_id_; }
+
+bool MPIWrapper::CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &group_ranks) {
+  std::vector<int> ranks(group_ranks.begin(), group_ranks.end());
+  MPI_Group mpi_group;
+  CHECK_RET(MPI_Group_incl(world_group_, ranks.size(), ranks.data(), &mpi_group), MPI_SUCCESS,
+            "Failed to produce a new group from MPI_COMM_WORLD group for " + group_name);
+  SetGroupNameToMPIGroup(group_name, mpi_group);
+
+  MPI_Comm mpi_group_comm;
+  CHECK_RET(MPI_Comm_create(MPI_COMM_WORLD, mpi_group, &mpi_group_comm), MPI_SUCCESS,
+            "Failed to create MPI communicator.");
+  if (mpi_group_comm == MPI_COMM_NULL) {
+    return false;
+  }
+
+  ncclUniqueId group_unique_id;
+  if (rank_id_ == ranks[0]) {
+    group_unique_id = NCCLWrapper::instance().nccl_unique_id();
+  }
+  MPI_Bcast(&group_unique_id, sizeof(ncclUniqueId), MPI_BYTE, ranks[0], mpi_group_comm);
+
+  int group_rank[1];
+  int global_rank[1] = {rank_id_};
+  CHECK_RET(MPI_Group_translate_ranks(world_group_, 1, global_rank, mpi_group, group_rank), MPI_SUCCESS,
+            "Failed to translate global rank to group rank.");
+  if (group_rank[0] == MPI_UNDEFINED) {
+    return false;
+  }
+
+  ncclComm_t nccl_group_comm;
+  NCCLWrapper::instance().InitNCCLComm(&nccl_group_comm, ranks.size(), group_unique_id, group_rank[0]);
+  NCCLWrapper::instance().SetGroupNameToNCCLComm(group_name, nccl_group_comm);
+  return true;
+}
+
+int MPIWrapper::GetRankIDByGroup(const std::string &group_name) {
+  CHECK_RET(group_name_to_mpi_group_map_.count(group_name), 1, "Failed to get MPI group by group name " + group_name);
+  MPI_Group mpi_group = group_name_to_mpi_group_map_[group_name];
+  int rank;
+  CHECK_RET(MPI_Group_rank(mpi_group, &rank), MPI_SUCCESS, "Failed to get rank id by group name." + group_name);
+  return rank;
+}
+
+int MPIWrapper::GetGroupSize(const std::string &group_name) {
+  CHECK_RET(group_name_to_mpi_group_map_.count(group_name), 1, "Failed to get MPI group by group name" + group_name);
+  MPI_Group mpi_group = group_name_to_mpi_group_map_[group_name];
+  int size;
+  CHECK_RET(MPI_Group_size(mpi_group, &size), MPI_SUCCESS, "Failed to get group size by group name." + group_name);
+  return size;
+}
+
+bool MPIWrapper::DestroyGroup(const std::string &group_name) {
+  auto group_iter = group_name_to_mpi_group_map_.find(group_name);
+  if (group_iter == group_name_to_mpi_group_map_.end()) {
+    return false;
+  }
+  group_name_to_mpi_group_map_.erase(group_name);
+  MPI_Group mpi_group = group_iter->second;
+  CHECK_RET(MPI_Group_free(&mpi_group), MPI_SUCCESS, "Failed to free MPI group for " + group_name);
+  NCCLWrapper::instance().DestroyGroup(group_name);
+  return true;
+}
+
+void MPIWrapper::Init() {
+  int initialized;
+  CHECK_RET(MPI_Initialized(&initialized), MPI_SUCCESS, "Failed to check mpi initialization status.");
+  if (initialized == 0) {
+    MPI_Init(nullptr, nullptr);
+  }
+
+  CHECK_RET(MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_), MPI_SUCCESS, "Failed to init mpi rank id.");
+  CHECK_RET(MPI_Comm_size(MPI_COMM_WORLD, &rank_size_), MPI_SUCCESS, "Failed to init mpi rank size.");
+  NCCLWrapper::instance().set_rank(rank_id_, rank_size_);
+  AssignLocalRankID();
+
+  CHECK_RET(MPI_Comm_group(MPI_COMM_WORLD, &world_group_), MPI_SUCCESS, "Failed to get group of MPI_COMM_WORLD");
+  SetGroupNameToMPIGroup(NCCL_WORLD_GROUP, world_group_);
+
+  ncclUniqueId unique_id;
+  if (rank_id_ == 0) {
+    unique_id = NCCLWrapper::instance().nccl_unique_id();
+  }
+  CHECK_RET(MPI_Bcast(reinterpret_cast<void *>(&unique_id), sizeof(unique_id), MPI_BYTE, 0, MPI_COMM_WORLD),
+            MPI_SUCCESS, "Failed to broadcast nccl unique id.");
+  NCCLWrapper::instance().set_nccl_unique_id(unique_id);
+  return;
+}
+
+void MPIWrapper::AssignLocalRankID() {
+  char host_name[MAX_HOSTNAME_LEN] = {0};
+  CHECK_RET(gethostname(host_name, MAX_HOSTNAME_LEN), 0, "Getting host name failed.");
+  size_t host_hash = std::hash<std::string>()(host_name);
+
+  const int kRankSize = rank_size_;
+  size_t all_host_hashs[kRankSize];
+  all_host_hashs[rank_id_] = host_hash;
+  CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
+            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
+  for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
+    if (global_rank == rank_id_) {
+      break;
+    }
+    if (all_host_hashs[global_rank] == all_host_hashs[rank_id_]) {
+      local_rank_id_++;
+    }
+  }
+  return;
+}
+
+void MPIWrapper::SetGroupNameToMPIGroup(const std::string &group_name, const MPI_Group mpi_group) {
+  group_name_to_mpi_group_map_[group_name] = mpi_group;
+}
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
similarity index 69%
rename from mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
index 6dfedea922..19d06b32d3 100644
--- a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
@@ -22,7 +22,10 @@
 #include <unistd.h>
 #include <mpi.h>
 #include <iostream>
-#include "device/gpu/distribution/collective_common.h"
+#include <map>
+#include <string>
+#include <vector>
+#include "runtime/device/gpu/distribution/collective_common.h"
 
 namespace mindspore {
 namespace device {
@@ -33,16 +36,23 @@ class MPIWrapper {
   MPIWrapper &operator=(const MPIWrapper &) = delete;
   static MPIWrapper &instance();
   int local_rank_id() const;
+  bool CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &ranks);
+  int GetRankIDByGroup(const std::string &group_name);
+  int GetGroupSize(const std::string &group_name);
+  bool DestroyGroup(const std::string &group_name);
 
  private:
   MPIWrapper();
   ~MPIWrapper();
   void Init();
-  void AssignLocalRankId();
+  void AssignLocalRankID();
+  void SetGroupNameToMPIGroup(const std::string &group_name, const MPI_Group mpi_group);
 
   int rank_id_;
   int rank_size_;
   int local_rank_id_;
+  MPI_Group world_group_;
+  std::map<std::string, MPI_Group> group_name_to_mpi_group_map_;
 };
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
similarity index 52%
rename from mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
index aa4756a69f..bcba538309 100644
--- a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/nccl_wrapper.h"
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
 
 namespace mindspore {
 namespace device {
@@ -40,21 +40,51 @@ void NCCLWrapper::set_rank(int rank_id, int rank_size) {
 void NCCLWrapper::InitNCCLComm() {
   CHECK_RET(ncclCommInitRank(&comm_, rank_size_, unique_id_, rank_id_), ncclSuccess,
             "Failed to init nccl communicator.");
+  group_to_comm_map_[NCCL_WORLD_GROUP] = comm_;
+}
+
+void NCCLWrapper::InitNCCLComm(ncclComm_t *comm, int rank_size, ncclUniqueId unique_id, int rank) {
+  CHECK_RET(ncclCommInitRank(comm, rank_size, unique_id, rank), ncclSuccess, "Failed to init nccl communicator.");
 }
 
 ncclResult_t NCCLWrapper::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
-                                    ncclRedOp_t reduce_type, cudaStream_t stream) {
-  return ncclAllReduce(input_addr, output_addr, count, data_type, reduce_type, comm_, stream);
+                                    ncclRedOp_t reduce_type, cudaStream_t stream, const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for AllReduce by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclAllReduce(input_addr, output_addr, count, data_type, reduce_type, group_comm, stream);
 }
 
 ncclResult_t NCCLWrapper::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
-                                    cudaStream_t stream) {
-  return ncclAllGather(input_addr, output_addr, count, data_type, comm_, stream);
+                                    cudaStream_t stream, const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for AllGather by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclAllGather(input_addr, output_addr, count, data_type, group_comm, stream);
 }
 
 ncclResult_t NCCLWrapper::ReduceScatter(const void *input_addr, void *output_addr, size_t count,
-                                        ncclDataType_t data_type, ncclRedOp_t reduce_type, cudaStream_t stream) {
-  return ncclReduceScatter(input_addr, output_addr, count, data_type, reduce_type, comm_, stream);
+                                        ncclDataType_t data_type, ncclRedOp_t reduce_type, cudaStream_t stream,
+                                        const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for ReduceScatter by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclReduceScatter(input_addr, output_addr, count, data_type, reduce_type, group_comm, stream);
+}
+
+void NCCLWrapper::SetGroupNameToNCCLComm(const std::string &group_name, const ncclComm_t comm) {
+  group_to_comm_map_[group_name] = comm;
+}
+
+void NCCLWrapper::DestroyGroup(const std::string &group_name) {
+  auto group_iter = group_to_comm_map_.find(group_name);
+  if (group_iter == group_to_comm_map_.end()) {
+    return;
+  }
+  group_to_comm_map_.erase(group_iter);
+  ncclComm_t group_comm = group_iter->second;
+  CHECK_RET(ncclCommDestroy(group_comm), ncclSuccess, "Failed to destroy NCCL communicator for " + group_name);
+  return;
 }
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
similarity index 74%
rename from mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
index 5df1e63bb8..9cea338c41 100644
--- a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
@@ -20,7 +20,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <nccl.h>
-#include "device/gpu/distribution/collective_common.h"
+#include <string>
+#include <map>
+#include "runtime/device/gpu/distribution/collective_common.h"
 
 namespace mindspore {
 namespace device {
@@ -34,12 +36,15 @@ class NCCLWrapper {
   void set_nccl_unique_id(ncclUniqueId unique_id);
   void set_rank(int rank_id, int rank_size);
   void InitNCCLComm();
+  void InitNCCLComm(ncclComm_t *comm, int rank_size, ncclUniqueId unique_id, int rank);
   ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                         ncclRedOp_t op, cudaStream_t stream);
+                         ncclRedOp_t op, cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
   ncclResult_t AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                         cudaStream_t stream);
+                         cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
   ncclResult_t ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                             ncclRedOp_t op, cudaStream_t stream);
+                             ncclRedOp_t op, cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
+  void SetGroupNameToNCCLComm(const std::string &group_name, const ncclComm_t comm);
+  void DestroyGroup(const std::string &group_name);
 
  private:
   NCCLWrapper() : rank_id_(-1), rank_size_(0) {}
@@ -50,6 +55,7 @@ class NCCLWrapper {
   int rank_size_;
   ncclUniqueId unique_id_;
   ncclComm_t comm_;
+  std::map<std::string, ncclComm_t> group_to_comm_map_;
 };
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
similarity index 99%
rename from mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
index 621ba557e5..a1b1fa9b79 100644
--- a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 #include <cuda_runtime_api.h>
 #include <utility>
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
similarity index 98%
rename from mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
index 5ce4a2cbdc..722a36c4ed 100644
--- a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
@@ -25,7 +25,7 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "device/gpu/blocking_queue.h"
+#include "runtime/device/gpu/blocking_queue.h"
 
 #define EXPORT __attribute__((visibility("default")))
 
diff --git a/mindspore/ccsrc/device/gpu/gpu_common.h b/mindspore/ccsrc/runtime/device/gpu/gpu_common.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/gpu_common.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_common.h
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
similarity index 93%
rename from mindspore/ccsrc/device/gpu/gpu_device_address.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
index 401eb9f34e..a20a6a9a3c 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_device_address.h"
+#include "runtime/device/gpu/gpu_device_address.h"
 #include <vector>
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 #include "utils/log_adapter.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_device_address.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
index 4074cb6ce9..ade738deed 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_address.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
similarity index 94%
rename from mindspore/ccsrc/device/gpu/gpu_device_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
index 9f5f37c606..8f17fc20b5 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
similarity index 93%
rename from mindspore/ccsrc/device/gpu/gpu_device_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
index b6b630181e..002806675c 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
@@ -21,8 +21,8 @@
 #include <cublas_v2.h>
 #include <vector>
 #include <memory>
-#include "device/gpu/cuda_driver.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
similarity index 85%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
index 19d2284510..9d88a205bc 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/gpu/gpu_kernel_build.h"
+#include "runtime/device/gpu/gpu_kernel_build.h"
 #include <string>
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
-#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_build.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
index 5770e4d3b1..831c4e9511 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_GPUKERNELBUILD_H_
 
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
similarity index 80%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index ad0e093d7f..ddf73841b7 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_kernel_runtime.h"
-#include "device/gpu/gpu_device_address.h"
-#include "device/gpu/cuda_driver.h"
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_memory_allocator.h"
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/gpu_kernel_runtime.h"
+#include "runtime/device/gpu/gpu_device_address.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
-#include "device/kernel_runtime_manager.h"
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "common/utils.h"
-#include "device/gpu/gpu_memory_manager.h"
-#include "kernel/common_utils.h"
-#include "device/gpu/gpu_memory_copy_manager.h"
+#include "runtime/device/gpu/gpu_memory_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_memory_copy_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -137,6 +137,7 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
   if (is_enable_dynamic_mem) {
     // Use the dynamic memory pool.
     InitKernelRefCount(graph);
+    InitMemorySwapInfo(graph);
     InitKernelOutputAddress(graph);
   } else {
     AssignDynamicMemory(graph);
@@ -144,27 +145,24 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::Run(session::KernelGraph *graph) {
+  struct timeval start_time, end_time;
+  (void)gettimeofday(&start_time, nullptr);
   bool ret = true;
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool();
   bool is_enable_pynative_infer = context_ptr->enable_pynative_infer();
-  auto iter = mem_swap_map_.find(graph);
-  if (iter == mem_swap_map_.end()) {
-    GPUMemCopyManagerPtr gpu_mem_copy_manager = std::make_shared<GPUMemCopyManager>();
-    iter = mem_swap_map_.emplace(graph, std::make_shared<MemSwapManager>(gpu_mem_copy_manager)).first;
-  }
-  mem_swap_manager_ = iter->second;
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  struct timeval start_time, end_time;
-  (void)gettimeofday(&start_time, nullptr);
   if (is_enable_dynamic_mem && !is_enable_pynative_infer) {
+    auto graph_id = graph->graph_id();
+    auto iter = mem_swap_map_.find(graph_id);
+    if (iter == mem_swap_map_.end()) {
+      MS_LOG(EXCEPTION) << "Find memory swap map failed.";
+    }
+    mem_swap_manager_ = iter->second;
+    MS_EXCEPTION_IF_NULL(mem_swap_manager_);
     while (!LaunchKernelDynamic(graph)) {
-      ClearKernelOutputAddress(graph);
-      if (!mem_swap_manager_->mem_swap_init()) {
-        mem_swap_manager_->Init(graph);
-      }
-      if (!mem_swap_manager_->RetreatSwapInfo()) {
+      MS_LOG(WARNING) << "Run out of memory and try memory swapping, it may take some time, please wait a moment.";
+      if (!UpdateMemorySwapInfo(graph)) {
         return false;
       }
     }
@@ -197,6 +195,16 @@ void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) {
   mem_reuse_util_map_[graph_id] = mem_reuse_util_ptr;
 }
 
+void GPUKernelRuntime::InitMemorySwapInfo(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  GPUMemCopyManagerPtr gpu_mem_copy_manager = std::make_shared<GPUMemCopyManager>();
+  MS_EXCEPTION_IF_NULL(gpu_mem_copy_manager);
+  MemSwapManagerPtr mem_swap_manager = std::make_shared<MemSwapManager>(gpu_mem_copy_manager);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager);
+  auto graph_id = graph->graph_id();
+  mem_swap_map_[graph_id] = mem_swap_manager;
+}
+
 void GPUKernelRuntime::InitKernelOutputAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   auto &kernels = graph->execution_order();
@@ -227,7 +235,6 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
       if (!AnfAlgo::OutputAddrExist(kernel, i)) {
         continue;
       }
-
       auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
       if (device_address->ptr_) {
         mem_manager_->FreeMemFromMemPool(device_address);
@@ -239,9 +246,12 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
 
 bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto graph_id = graph->graph_id();
-  auto mem_reuse_util_ptr = mem_reuse_util_map_[graph_id];
+  auto iter = mem_reuse_util_map_.find(graph_id);
+  if (iter == mem_reuse_util_map_.end()) {
+    MS_LOG(EXCEPTION) << "Find memory reuse map failed.";
+  }
+  auto mem_reuse_util_ptr = iter->second;
   MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
   // Reset the reference count.
   mem_reuse_util_ptr->ResetDynamicUsedRefCount();
@@ -263,27 +273,14 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
       MS_LOG(EXCEPTION) << "Launch kernel failed.";
     }
     FreeKernelDynamicRes(kernel, kernel_workspaces, graph_id);
-
-    if (mem_swap_manager_->trigger_swap() && mem_swap_manager_->QueryKernelTriggerSwap(kernel)) {
-      CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
-      if (!AddMemSwapTask(kernel)) {
-        return false;
-      }
-    }
-
-    if (mem_swap_manager_->trigger_swap()) {
-      mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
-    }
+    UpdateMemorySwapTask(kernel);
   }
-
   CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
-  if (mem_swap_manager_->trigger_swap()) {
-    mem_swap_manager_->ClearSwapQueue();
-  }
+  ClearSwapQueue();
   return true;
 }
 
-bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) {
+bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto &mem_swap_info_list = mem_swap_manager_->QueryKernelMemSwapInfo(kernel);
   for (auto &mem_swap_info : mem_swap_info_list) {
@@ -311,14 +308,92 @@ bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) {
   return true;
 }
 
+bool GPUKernelRuntime::UpdateMemorySwapInfo(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  ClearKernelOutputAddress(graph);
+  if (!mem_swap_manager_->mem_swap_init()) {
+    mem_swap_manager_->Init(graph);
+  }
+  return mem_swap_manager_->RetreatSwapInfo();
+}
+
+bool GPUKernelRuntime::UpdateMemorySwapTask(const AnfNodePtr &kernel) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return true;
+  }
+  if (mem_swap_manager_->QueryKernelTriggerSwap(kernel)) {
+    CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
+    if (!AddMemorySwapTask(kernel)) {
+      return false;
+    }
+  }
+  CHECK_OP_RET_WITH_EXCEPT(mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost), "SyncCopyStream failed.");
+  return true;
+}
+
+void GPUKernelRuntime::UpdateHostSwapQueue(const DeviceAddressPtr device_address) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
+    device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
+  }
+  auto status = device_address->status();
+  switch (status) {
+    case DeviceAddressStatus::kInDevice:
+      break;
+    case DeviceAddressStatus::kInDeviceToHost: {
+      mem_swap_manager_->InsertSwapInBlackList(device_address->ptr_);
+      device_address->set_status(DeviceAddressStatus::kInDevice);
+      break;
+    }
+    case DeviceAddressStatus::kInHostToDevice: {
+      while (device_address->status() != DeviceAddressStatus::kInDevice) {
+        while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
+          device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
+        }
+      }
+      break;
+    }
+    case DeviceAddressStatus::kInHost:
+      MS_LOG(ERROR) << "Invaild device address status:" << status;
+      break;
+    default:
+      MS_LOG(EXCEPTION) << "Invaild device address status:" << status;
+  }
+}
+
+void GPUKernelRuntime::UpdateDeviceSwapQueue() {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
+    if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
+      device_address_swap_out->set_status(DeviceAddressStatus::kInHost);
+      mem_manager_->FreeMemFromMemPool(device_address_swap_out);
+    }
+  }
+}
+
+void GPUKernelRuntime::ClearSwapQueue() {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  mem_swap_manager_->ClearSwapQueue();
+}
+
 bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto ret = mem_manager_->MallocMemFromMemPool(device_address, size);
   if (!ret) {
     if (!mem_swap_manager_->trigger_swap()) {
       return false;
     }
-
     mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
     while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
       if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
@@ -326,7 +401,6 @@ bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address,
         mem_manager_->FreeMemFromMemPool(device_address_swap_out);
       }
     }
-
     ret = mem_manager_->MallocMemFromMemPool(device_address, size);
     if (!ret) {
       return false;
@@ -337,12 +411,12 @@ bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address,
 
 void *GPUKernelRuntime::AttemptMallocMem(size_t size) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto device_ptr = mem_manager_->MallocMemFromMemPool(size);
   if (!device_ptr) {
     if (!mem_swap_manager_->trigger_swap()) {
       return nullptr;
     }
-
     mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
     while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
       if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
@@ -350,7 +424,6 @@ void *GPUKernelRuntime::AttemptMallocMem(size_t size) {
         mem_manager_->FreeMemFromMemPool(device_address_swap_out);
       }
     }
-
     device_ptr = mem_manager_->MallocMemFromMemPool(size);
     if (!device_ptr) {
       return nullptr;
@@ -377,40 +450,11 @@ bool GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
 bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(kernel_inputs);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     // Graph may be all nop nodes and not remove nop node, so this can not skip nop node.
     auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i, false);
     MS_EXCEPTION_IF_NULL(device_address);
-    if (mem_swap_manager_->trigger_swap()) {
-      while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
-        device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
-      }
-
-      auto status = device_address->status();
-      switch (status) {
-        case DeviceAddressStatus::kInDevice:
-          break;
-        case DeviceAddressStatus::kInHost:
-          break;
-        case DeviceAddressStatus::kInDeviceToHost: {
-          mem_swap_manager_->InsertSwapInBlackList(device_address->ptr_);
-          device_address->set_status(DeviceAddressStatus::kInDevice);
-          break;
-        }
-        case DeviceAddressStatus::kInHostToDevice: {
-          while (device_address->status() != DeviceAddressStatus::kInDevice) {
-            while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
-              device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
-            }
-          }
-          break;
-        }
-        default:
-          MS_LOG(ERROR) << "Invaild device address status";
-          return false;
-      }
-    }
+    UpdateHostSwapQueue(device_address);
     MS_EXCEPTION_IF_NULL(device_address->ptr_);
     kernel::AddressPtr input = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(input);
@@ -426,16 +470,7 @@ bool GPUKernelRuntime::AllocKernelOutputDynamicRes(const mindspore::kernel::Kern
                                                    AddressPtrList *kernel_outputs) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(kernel_outputs);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  if (mem_swap_manager_->trigger_swap()) {
-    while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
-      if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
-        device_address_swap_out->set_status(DeviceAddressStatus::kInHost);
-        mem_manager_->FreeMemFromMemPool(device_address_swap_out);
-      }
-    }
-  }
+  UpdateDeviceSwapQueue();
   auto output_sizes = kernel_mod.GetOutputSizeList();
   for (size_t i = 0; i < output_sizes.size(); ++i) {
     auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
similarity index 84%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
index ea3ab17160..2b1f8198ce 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <utility>
 #include <unordered_map>
-#include "device/kernel_runtime.h"
-#include "device/kernel_runtime_manager.h"
-#include "pre_activate/mem_reuse/mem_swap_manager.h"
+#include "runtime/device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -53,9 +53,9 @@ class GPUKernelRuntime : public KernelRuntime {
   // The related functions and members for using dynamic memory pool.
   void InitKernelRefCount(const session::KernelGraph *graph);
   void InitKernelOutputAddress(const session::KernelGraph *graph);
+  void InitMemorySwapInfo(const session::KernelGraph *graph);
   void ClearKernelOutputAddress(const session::KernelGraph *graph);
   bool LaunchKernelDynamic(const session::KernelGraph *graph);
-  bool AddMemSwapTask(const AnfNodePtr &kernel);
   bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size);
   void *AttemptMallocMem(size_t size);
   bool AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
@@ -74,8 +74,14 @@ class GPUKernelRuntime : public KernelRuntime {
                                   std::vector<size_t> size_list);
   void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces,
                             uint32_t graph_id);
+  bool AddMemorySwapTask(const AnfNodePtr &kernel);
+  bool UpdateMemorySwapInfo(const session::KernelGraph *graph);
+  bool UpdateMemorySwapTask(const AnfNodePtr &kernel);
+  void UpdateHostSwapQueue(const DeviceAddressPtr device_address);
+  void UpdateDeviceSwapQueue();
+  void ClearSwapQueue();
   std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
-  std::unordered_map<void *, MemSwapManagerPtr> mem_swap_map_;
+  std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
   MemSwapManagerPtr mem_swap_manager_{nullptr};
 };
 MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
index 9137945661..e2395bbaf2 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include "device/gpu/gpu_memory_allocator.h"
-#include "device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils_base.h"
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
similarity index 91%
rename from mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
index 90d7791057..4b6eaa4e14 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_ALLOCATOR_H_
 
 #include <memory>
-#include "device/gpu/cuda_driver.h"
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
similarity index 96%
rename from mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
index 80206f309d..0406c0f151 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_memory_copy_manager.h"
-#include "device/gpu/gpu_common.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/gpu/gpu_memory_copy_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
similarity index 91%
rename from mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
index 36ff273015..dc99b7f7d0 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
@@ -20,10 +20,10 @@
 #include <memory>
 #include <queue>
 #include <utility>
-#include "pre_activate/mem_reuse/mem_copy_manager.h"
-#include "device/device_address.h"
-#include "device/gpu/cuda_driver.h"
-#include "kernel/kernel.h"
+#include "backend/optimizer/mem_reuse/mem_copy_manager.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
index 9a63921add..ffa07eea0d 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_memory_manager.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/gpu_memory_manager.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_memory_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
index c79fb9cc22..533116cefc 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #include <vector>
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
index 42cdcf29ec..78915f10d7 100644
--- a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_stream_assign.h"
+#include "runtime/device/gpu/gpu_stream_assign.h"
 #include <set>
 #include <string>
 #include <memory>
 #include <algorithm>
-#include "device/gpu/gpu_common.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_stream_assign.h b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_stream_assign.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
index f8041878b2..f22ce8fe38 100644
--- a/mindspore/ccsrc/device/gpu/gpu_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
similarity index 94%
rename from mindspore/ccsrc/device/gpu/kernel_info_setter.cc
rename to mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 42e76e2483..4326987784 100644
--- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
 #include <string>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/utils.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/kernel_build_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "common/utils.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/oplib/opinfo.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace device {
@@ -88,10 +88,11 @@ std::string SupportedTypeList(const CNodePtr &kernel_node) {
       supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
     }
     supported_type_lists = supported_type_lists + supported_akg_type_list + "], out[";
+    supported_akg_type_list.clear();
     for (auto type : supported_akg_type_out) {
       supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
     }
-    supported_type_lists += "]; ";
+    supported_type_lists = supported_type_lists + supported_akg_type_list + "]; ";
   }
   return supported_type_lists;
 }
diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.h b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/kernel_info_setter.h
rename to mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc
rename to mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
index bcad74e5b5..4605a0eb4e 100644
--- a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/mpi/mpi_initializer.h"
+#include "runtime/device/gpu/mpi/mpi_initializer.h"
 
 #include <mpi.h>
 #include <pybind11/operators.h>
diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h
rename to mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.h
diff --git a/mindspore/ccsrc/device/gpu/readme.md b/mindspore/ccsrc/runtime/device/gpu/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/gpu/readme.md
rename to mindspore/ccsrc/runtime/device/gpu/readme.md
diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/runtime/device/kernel_adjust.cc
similarity index 96%
rename from mindspore/ccsrc/device/kernel_adjust.cc
rename to mindspore/ccsrc/runtime/device/kernel_adjust.cc
index fd0a8eb967..bb1f7f723e 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_adjust.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_adjust.h"
+#include "runtime/device/kernel_adjust.h"
 
 #include <map>
 #include <algorithm>
@@ -23,17 +23,18 @@
 #include <unordered_map>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 #include "common/trans.h"
 #include "utils/config_manager.h"
 #include "common/utils.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "device/ascend/profiling/profiling_manager.h"
-#include "device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
 #include "runtime/base.h"
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+
 namespace mindspore {
 namespace device {
 using device::ascend::ProfilingUtils;
@@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
   std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
   MS_EXCEPTION_IF_NULL(mute_inputs);
   mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
+  mute_inputs->push_back(switch_loop_input[kEpochParamName]);
   mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
   mute_inputs->push_back(switch_loop_input[kZeroParamName]);
   mute_inputs->push_back(switch_loop_input[kOneParamName]);
@@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
   one->set_abstract(paremeter_abstract_ptr);
   ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
   (*switch_loop_input)[kOneParamName] = one_new;
+
+  ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
+  MS_EXCEPTION_IF_NULL(epoch);
+  epoch->set_name(kEpochParamName);
+  epoch->set_abstract(paremeter_abstract_ptr);
+  ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
+  (*switch_loop_input)[kEpochParamName] = epoch_new;
 }
 
 kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
@@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   *val = 0;
   inputs->push_back(loop_count_tensor);
 
+  // Epoch in device
+  tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(val);
+  *val = 0;
+  inputs->push_back(epoch_tensor);
+
   tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
   MS_EXCEPTION_IF_NULL(iter_loop_tensor);
   val = static_cast<int32_t *>(iter_loop_tensor->data_c());
@@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   MS_EXCEPTION_IF_NULL(val);
   *val = 1;
   inputs->push_back(one_tensor);
+
   MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
 }
 
diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/runtime/device/kernel_adjust.h
similarity index 92%
rename from mindspore/ccsrc/device/kernel_adjust.h
rename to mindspore/ccsrc/runtime/device/kernel_adjust.h
index bf3ba2acb2..dbd6f226af 100644
--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/runtime/device/kernel_adjust.h
@@ -23,12 +23,12 @@
 #include <vector>
 #include <unordered_set>
 #include "ir/anf.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel_build_info.h"
-#include "session/session_context.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/session/session_context.h"
 #include "ir/tensor.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "runtime/device/kernel_info.h"
 
 using mindspore::device::ascend::ProfilingTraceInfo;
 using mindspore::device::ascend::ProfilingUtils;
@@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
 constexpr auto kIterLoopParamName = "iter_loop";
 constexpr auto kZeroParamName = "zero";
 constexpr auto kOneParamName = "one";
+constexpr auto kEpochParamName = "loop_epoch";
 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
 constexpr uint32_t kSecondStreamSwitchLabel = 2;
 
diff --git a/mindspore/ccsrc/device/kernel_info.cc b/mindspore/ccsrc/runtime/device/kernel_info.cc
similarity index 99%
rename from mindspore/ccsrc/device/kernel_info.cc
rename to mindspore/ccsrc/runtime/device/kernel_info.cc
index 59c9b0f411..692532e70b 100644
--- a/mindspore/ccsrc/device/kernel_info.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/kernel_info.h b/mindspore/ccsrc/runtime/device/kernel_info.h
similarity index 91%
rename from mindspore/ccsrc/device/kernel_info.h
rename to mindspore/ccsrc/runtime/device/kernel_info.h
index 84cfaa0fa3..baded9d9a3 100644
--- a/mindspore/ccsrc/device/kernel_info.h
+++ b/mindspore/ccsrc/runtime/device/kernel_info.h
@@ -19,15 +19,16 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
-#include "device/ascend/ascend_device_address.h"
-#include "kernel/kernel.h"
+#include "ir/kernel_info_dev.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 const uint32_t kInvalidGraphId = UINT32_MAX;
 const uint32_t kInvalidDistincLabel = UINT32_MAX;
 namespace device {
-class KernelInfo {
+class KernelInfo : public KernelInfoDevice {
  public:
   KernelInfo() {
     kernel_mod_ = nullptr;
@@ -41,6 +42,7 @@ class KernelInfo {
   }
   virtual ~KernelInfo() = default;
 
+  bool has_build_info() const override { return select_kernel_build_info() != nullptr; }
   const kernel::KernelBuildInfo *select_kernel_build_info() const;
   kernel::KernelBuildInfoPtr GetMutableSelectKernelBuildInfo() const;
   void set_select_kernel_build_info(const kernel::KernelBuildInfoPtr &select_kernel_build_info) {
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
similarity index 96%
rename from mindspore/ccsrc/device/kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/kernel_runtime.cc
index 27cf1dfc92..3de9af8c23 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include <vector>
 #include <utility>
 #include <numeric>
@@ -23,12 +23,12 @@
 #include "common/trans.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "pipeline/parse/python_adapter.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/common_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "ir/value.h"
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
@@ -214,8 +214,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
     auto output_size = AnfAlgo::GetOutputTensorNum(item);
     for (size_t index = 0; index < output_size; index++) {
       MS_EXCEPTION_IF_NULL(input_tensors[input_index]);
-      if (input_tensors[input_index]->device_address().get() != nullptr) {
-        AnfAlgo::SetOutputAddr(input_tensors[input_index]->device_address(), index, item.get());
+      auto output_address =
+        std::dynamic_pointer_cast<device::DeviceAddress>(input_tensors[input_index]->device_address());
+      if (output_address != nullptr) {
+        AnfAlgo::SetOutputAddr(output_address, index, item.get());
         continue;
       }
       TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(item, index);
@@ -292,6 +294,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
   auto graph_inputs = graph->inputs();
   auto graph_valid_input = graph->valid_inputs();
+  graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end());
   std::vector<AnfNodePtr> need_alloc_nodes;
   for (size_t i = 0; i < graph_inputs.size(); ++i) {
     auto item = graph_inputs[i];
@@ -431,6 +434,10 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr
     std::string output_format = AnfAlgo::GetOutputFormat(node, j);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j);
     auto address = CreateDeviceAddress(output_ptr, output_sizes[j], output_format, output_type);
+    MS_EXCEPTION_IF_NULL(address);
+    if (AnfAlgo::IsCommunicationOp(node) && context_ptr->enable_hccl()) {
+      address->UpdateCommunicationAddress();
+    }
     AnfAlgo::SetOutputAddr(address, j, node.get());
     output_ptr += align_size_list[j];
   }
@@ -480,6 +487,8 @@ void KernelRuntime::AssignCommunicationNodeInputMem(const AnfNodePtr &node) {
 }
 
 void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(mem_manager_);
   if (AnfAlgo::IsGetNext(NOT_NULL(node)) && flag == kReuseDynamicMem) {
@@ -509,7 +518,11 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in
     std::string output_format = AnfAlgo::GetOutputFormat(node, i);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, i);
     auto device_address = CreateDeviceAddress(ptr, output_sizes[i], output_format, output_type);
+    MS_EXCEPTION_IF_NULL(device_address);
     device_address->set_host_shape(trans::GetRuntimePaddingShape(node, i));
+    if (AnfAlgo::IsCommunicationOp(node) && context_ptr->enable_hccl()) {
+      device_address->UpdateCommunicationAddress();
+    }
     AnfAlgo::SetOutputAddr(device_address, i, node.get());
   }
 }
diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h
similarity index 95%
rename from mindspore/ccsrc/device/kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/kernel_runtime.h
index 8c6a5eb19b..8320355b82 100644
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <map>
 
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 #include "ir/tensor.h"
 #include "predict/generator/utils/ir_model_util.h"
 #ifdef ENABLE_DUMP_E2E
@@ -30,11 +30,11 @@
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/context/ms_context.h"
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 
 using mindspore::tensor::Tensor;
 using std::vector;
diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.cc b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
similarity index 98%
rename from mindspore/ccsrc/device/kernel_runtime_manager.cc
rename to mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
index 29d74762b4..626259f9ce 100644
--- a/mindspore/ccsrc/device/kernel_runtime_manager.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_runtime_manager.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.h b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
similarity index 98%
rename from mindspore/ccsrc/device/kernel_runtime_manager.h
rename to mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
index 89b45ff5f8..7fcb40ae67 100644
--- a/mindspore/ccsrc/device/kernel_runtime_manager.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <mutex>
 #include "common/utils.h"
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 namespace mindspore {
 namespace device {
 using KernelRuntimeCreator = std::function<std::shared_ptr<KernelRuntime>()>;
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/runtime/device/memory_manager.cc
similarity index 91%
rename from mindspore/ccsrc/device/memory_manager.cc
rename to mindspore/ccsrc/runtime/device/memory_manager.cc
index 5efbcd8a36..563d5f0f50 100644
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/memory_manager.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "device/memory_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/memory_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 using mindspore::memreuse::BestFitMemReuse;
 using mindspore::memreuse::MemReuseUtilPtr;
@@ -99,6 +99,11 @@ uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
   } else {
     align_size = GetCommonAlignSize(size);
   }
+
+  MS_LOG(INFO) << "Malloc Memory for Static: total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+               << "] static[" << total_static_size_ << "])"
+               << " malloc [" << align_size << "] communication_mem: " << communication_mem;
+
   if (static_mem_offset_ < align_size) {
     MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                       << "] static[" << total_static_size_ << "])"
@@ -126,6 +131,11 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
   } else {
     align_size = GetCommonAlignSize(size);
   }
+
+  MS_LOG(INFO) << "Malloc Memory for Dynamic: total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+               << "] static[" << total_static_size_ << "])"
+               << " malloc [" << align_size << "] communication_mem: " << communication_mem;
+
   uint64_t offset = dynamic_mem_offset_;
   auto new_offset = dynamic_mem_offset_ + align_size;
   if (new_offset > static_mem_offset_) {
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/runtime/device/memory_manager.h
similarity index 94%
rename from mindspore/ccsrc/device/memory_manager.h
rename to mindspore/ccsrc/runtime/device/memory_manager.h
index be250e0f3f..3c6fb1b39a 100644
--- a/mindspore/ccsrc/device/memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/memory_manager.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
 #include <memory>
 #include <vector>
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace device {
 const int kStaticMem = 0;
@@ -36,7 +36,7 @@ class MemoryManager {
 
   virtual void MallocDeviceMemory() = 0;
   virtual void FreeDeviceMemory() = 0;
-  void ResetDynamicMemory() {
+  virtual void ResetDynamicMemory() {
     total_dynamic_size_ = 0;
     dynamic_mem_offset_ = 0;
   }
diff --git a/mindspore/ccsrc/transform/CMakeLists.txt b/mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
similarity index 61%
rename from mindspore/ccsrc/transform/CMakeLists.txt
rename to mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
index c783cc0060..3f062609d5 100644
--- a/mindspore/ccsrc/transform/CMakeLists.txt
+++ b/mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
@@ -1,9 +1,9 @@
 if (ENABLE_GE OR ENABLE_D)
     file(GLOB_RECURSE _TRANSFORM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
     set_property(SOURCE ${_TRANSFORM_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_GE_ADPT)
-    add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_SRC_LIST})
+    add_library(_mindspore_transform_graph_ir_obj OBJECT ${_TRANSFORM_SRC_LIST})
 
     if (NOT ENABLE_GE)
-        target_compile_definitions(_mindspore_transform_obj PRIVATE NO_GE_CLIENT)
+        target_compile_definitions(_mindspore_transform_graph_ir_obj PRIVATE NO_GE_CLIENT)
     endif()
 endif ()
diff --git a/mindspore/ccsrc/transform/all_ops.h b/mindspore/ccsrc/transform/graph_ir/all_ops.h
similarity index 100%
rename from mindspore/ccsrc/transform/all_ops.h
rename to mindspore/ccsrc/transform/graph_ir/all_ops.h
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/graph_ir/convert.cc
similarity index 92%
rename from mindspore/ccsrc/transform/convert.cc
rename to mindspore/ccsrc/transform/graph_ir/convert.cc
index f88e31fcd2..7419dd2cc9 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/graph_ir/convert.cc
@@ -14,20 +14,21 @@
  * limitations under the License.
  */
 
-#include "transform/convert.h"
+#include "transform/graph_ir/convert.h"
 
 #include <inttypes.h>
 #include <algorithm>
 #include <stack>
 #include "utils/utils.h"
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
 #include "utils/symbolic.h"
 #include "utils/config_manager.h"
 #include "utils/convert_utils.h"
 #include "./common.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace transform {
@@ -206,6 +207,7 @@ const char kNameRange[] = "Range";
 const char kNameSquareSumAll[] = "SquareSumAll";
 const char kNameAscendQuant[] = "AscendQuant";
 const char kNameAscendDequant[] = "AscendDequant";
+const char kNameCase[] = "Case";
 
 // -----------------OpAdapter initialization--------------
 std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_map() {
@@ -378,7 +380,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {string(kNameBiasAdd), ADPT_DESC(BiasAdd)},
     {prim::kPrimRelu->name(), ADPT_DESC(Relu)},
 
-    {prim::kPrimMatMul->name(), ADPT_DESC(MatMul)},
+    {prim::kPrimMatMul->name(), ADPT_DESC(MatMulV2)},
 
     {string(kNameConst), ADPT_DESC(Constant, Const)},
     {string(kNameSoftmax), ADPT_DESC(SoftmaxV2)},
@@ -413,7 +415,8 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {string(kNameRange), ADPT_DESC(RangeD)},
     {string(kNameSquareSumAll), ADPT_DESC(SquareSumAll)},
     {string(kNameAscendQuant), ADPT_DESC(AscendQuant)},
-    {string(kNameAscendDequant), ADPT_DESC(AscendDequant)}};
+    {string(kNameAscendDequant), ADPT_DESC(AscendDequant)},
+    {string(kNameCase), ADPT_DESC(Case)}};
 #ifdef ENABLE_GE
   adpt_map[string(kNamePrint)] = ADPT_DESC(Print);
   adpt_map[string(kNameApplyAdam)] = ADPT_DESC(ApplyAdamD);
@@ -435,13 +438,32 @@ PrimType GetCNodeFuncType(const CNodePtr cnode) {
   return kPrimTypeUnknown;
 }
 
+bool IsCaseNode(const CNodePtr node) {
+  if (!node->inputs().empty() && node->input(0)->isa<CNode>() &&
+      GetCNodeFuncName(node->input(0)->cast<CNodePtr>()) == "switch_layer") {
+    return true;
+  }
+  return false;
+}
+
+std::string GetCNodeTargetFuncName(const CNodePtr cnode) {
+  if (IsCaseNode(cnode)) {
+    return string(kNameCase);
+  }
+  auto name = GetCNodeFuncName(cnode);
+  if (name == "switch_layer") {
+    name = "";
+  }
+  return name;
+}
+
 OpAdapterPtr DfGraphConvertor::FindAdapter(const AnfNodePtr node, bool train) {
   if (node->isa<CNode>()) {
     auto cnode = node->cast<CNodePtr>();
 
     std::string name = kNameCustomOp;
     if (!IsCustomCNode(cnode)) {
-      name = GetCNodeFuncName(cnode);
+      name = GetCNodeTargetFuncName(cnode);
     }
 
     auto it_adpt = get_adpt_map().find(name);
@@ -959,7 +981,7 @@ void DfGraphConvertor::TraceOutput(const AnfNodePtr node) {
   auto c = anf_out->cast<CNodePtr>();
   std::string name = "";
   if (anf_out->isa<CNode>()) {
-    name = GetCNodeFuncName(c);
+    name = GetCNodeTargetFuncName(c);
   }
 
   if (name == "make_tuple") {
@@ -1031,6 +1053,99 @@ void SetupDatasetIterGetNextNode(const OperatorPtr &op) {
   return;
 }
 
+void DfGraphConvertor::SetSubgraph(AnfNodePtr node) {
+  if (!node->isa<CNode>()) {
+    return;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  if (!IsCaseNode(cnode)) {
+    return;
+  }
+  std::vector<AnfNodePtr> case_inputs;
+  for (size_t i = 1; i < cnode->inputs().size(); i++) {
+    case_inputs.emplace_back(cnode->input(i));
+  }
+  std::shared_ptr<std::vector<DfGraph>> branches = std::make_shared<std::vector<DfGraph>>();
+  auto bnode = cnode->input(0)->cast<CNodePtr>()->input(2)->cast<CNodePtr>();
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    auto branch_node = bnode->input(i)->cast<CNodePtr>();
+    for (size_t j = 2; j < branch_node->inputs().size(); j++) {
+      if (std::find(case_inputs.begin(), case_inputs.end(), branch_node->input(j)) == case_inputs.end()) {
+        case_inputs.emplace_back(branch_node->input(j));
+      }
+    }
+  }
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    ProcessSubgraph(bnode->input(i), case_inputs);
+  }
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    branches->emplace_back(branches_map_[bnode->input(i).get()]);
+  }
+
+  if (op_cache_.find(node.get()) == op_cache_.end()) {
+    return;
+  }
+
+  OpAdapterPtr adpt = FindAdapter(node, training_);
+  if (nullptr == adpt) {
+    MS_LOG(DEBUG) << "Not found adapter";
+    return;
+  }
+
+  OperatorPtr op = Convert(node);
+  adpt->setSubgraph(op, 0, branches);
+  return;
+}
+
+void DfGraphConvertor::GetCaseNodeInput(const CNodePtr node, const CNodePtr input_node) {
+  std::vector<AnfNodePtr> case_inputs;
+  for (size_t i = 1; i < node->inputs().size(); i++) {
+    case_inputs.emplace_back(node->input(i));
+  }
+  std::shared_ptr<std::vector<DfGraph>> branches = std::make_shared<std::vector<DfGraph>>();
+  auto bnode = input_node->input(2)->cast<CNodePtr>();
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    auto branch_node = bnode->input(i)->cast<CNodePtr>();
+    for (size_t j = 2; j < branch_node->inputs().size(); j++) {
+      if (std::find(case_inputs.begin(), case_inputs.end(), branch_node->input(j)) == case_inputs.end()) {
+        case_inputs.emplace_back(branch_node->input(j));
+      }
+    }
+  }
+
+  const size_t case_index = 1;
+  const size_t make_tuple_index = 2;
+
+  AnfNodePtr case_index_iter = input_node->input(case_index);
+  AnfNodePtr make_tuple_iter = input_node->input(make_tuple_index);
+  auto make_tuple_node = make_tuple_iter->cast<CNodePtr>();
+  std::shared_ptr<std::vector<OutHandler>> tuple_items = std::make_shared<std::vector<OutHandler>>();
+
+  for (size_t i = 0; i < case_inputs.size(); i++) {
+    auto item = case_inputs[i];
+    auto op = Convert(item);
+    if (op != nullptr) {
+      tuple_items->emplace_back(OutHandler(op, ""));
+    } else if (out_handle_cache_.find(item.get()) != out_handle_cache_.end()) {
+      tuple_items->push_back(out_handle_cache_[item.get()]);
+    } else {
+      MS_LOG(WARNING) << "This anf node is not supported as a case input: " << item->ToString();
+      continue;
+    }
+  }
+
+  tuple_out_handle_cache_[make_tuple_node.get()] = tuple_items;
+
+  std::shared_ptr<std::vector<AnfNodePtr>> case_input_items = std::make_shared<std::vector<AnfNodePtr>>();
+  case_input_items->emplace_back(case_index_iter);
+  case_input_items->emplace_back(make_tuple_iter);
+  case_input_handle_cache_[node.get()] = case_input_items;
+}
+
 DfGraphConvertor &DfGraphConvertor::BuildGraph() {
   SetupDatasetIterGetNextNode(dataset_iter_getnext_);
 
@@ -1038,6 +1153,16 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
     return *this;
   }
 
+  // Case node set input.
+  std::vector<AnfNodePtr> nodes = ::mindspore::TopoSort(anf_graph_->get_return());
+  for (auto &it : nodes) {
+    if (it->isa<CNode>() && IsCaseNode(it->cast<CNodePtr>())) {
+      auto node = it->cast<CNodePtr>();
+      auto input_node = node->input(0)->cast<CNodePtr>();
+      GetCaseNodeInput(node, input_node);
+    }
+  }
+
   // update tuple_out_handle_cache_
   for (auto it : tuple_out_handle_cache_) {
     std::size_t len = it.second->size();
@@ -1058,10 +1183,11 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
 
   // set up dependices
   MS_LOG(DEBUG) << "set up dependices";
-  std::vector<AnfNodePtr> nodes = ::mindspore::TopoSort(anf_graph_->get_return());
+  nodes = ::mindspore::TopoSort(anf_graph_->get_return());
   for (auto &it : nodes) {
     SetNodeInput(it);
     SetOpControlInput(it);
+    SetSubgraph(it);
     UpdateOpDesc(it);
   }
 
@@ -1077,6 +1203,18 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
     inputs.push_back(*dataset_iter_getnext_);
   } else {
     auto params = anf_graph_->parameters();
+    if (use_inputs_) {
+      params = inputs_;
+      auto anf_params = anf_graph_->parameters();
+      for (size_t i = 0; i < params.size(); i++) {
+        for (size_t j = 0; j < anf_params.size(); j++) {
+          if (params[i]->ToString() == anf_params[j]->ToString()) {
+            params[i] = anf_params[j];
+          }
+        }
+      }
+    }
+
     int index = 0;
     for (auto &it : params) {
       auto name = std::static_pointer_cast<Parameter>(it)->name();
@@ -1187,10 +1325,21 @@ const std::vector<std::string> trans_var_list = {string(kNameAssign), string(kNa
 
 void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node) {
   OperatorPtr src = Convert(node);
+  int case_flag = 0;
   auto &inputs = node->inputs();
-  for (size_t i = 1; i < inputs.size(); i++) {
+  size_t input_size = inputs.size();
+  if (case_input_handle_cache_.find(node.get()) != case_input_handle_cache_.end()) {
+    case_flag = 1;
+    input_size = case_input_handle_cache_[node.get()]->size() + 1;
+  }
+
+  for (size_t i = 1; i < input_size; i++) {
     auto pred = inputs[i];
-    while (pred->isa<CNode>() && GetCNodeFuncName(pred->cast<CNodePtr>()) == "Depend") {
+    if (case_flag != 0) {
+      pred = case_input_handle_cache_[node.get()]->at(i - 1);
+    }
+
+    while (pred->isa<CNode>() && GetCNodeTargetFuncName(pred->cast<CNodePtr>()) == "Depend") {
       pred = pred->cast<CNodePtr>()->input(1);
     }
     // skip the None input
@@ -1198,7 +1347,7 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node
       continue;
     }
     // transform "Const" op to "Variable" op when the next node is "Assign" op.
-    std::string c_name = GetCNodeFuncName(node);
+    std::string c_name = GetCNodeTargetFuncName(node);
     auto pos = std::find(trans_var_list.begin(), trans_var_list.end(), c_name);
     if (!training_ && pos != trans_var_list.end() && pred->isa<Parameter>()) {
       std::string name = std::static_pointer_cast<Parameter>(pred)->name();
@@ -1222,7 +1371,7 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node
     if (it != out_handle_cache_.end()) {
       int ret = adpt->setInput(src, SizeToInt(i), it->second);
       if (ret == 0) {
-        if (pred->isa<CNode>() && GetCNodeFuncName(pred->cast<CNodePtr>()) == "tuple_getitem") {
+        if (pred->isa<CNode>() && GetCNodeTargetFuncName(pred->cast<CNodePtr>()) == "tuple_getitem") {
           compute_sout_ << op_draw_name_[pred->cast<CNodePtr>()->input(1).get()] << " -> " << op_draw_name_[node.get()]
                         << ":" << i << endl;
         } else if (pred->isa<Parameter>()) {
@@ -1280,6 +1429,23 @@ void DfGraphConvertor::SetNodeInput(const AnfNodePtr node) {
   DfGraphConvertor::SetOpInput(adpt, cnode);
 }
 
+void DfGraphConvertor::ProcessSubgraph(AnfNodePtr node, const std::vector<AnfNodePtr> &inputs) {
+  if (!node->isa<CNode>() || GetCNodeFuncName(node->cast<CNodePtr>()) != "Partial") {
+    return;
+  }
+  auto graph_node = node->cast<CNodePtr>()->input(1)->cast<ValueNodePtr>();
+  FuncGraphPtr anf_graph = graph_node->value()->cast<FuncGraphPtr>();
+  DfGraphConvertor convertor(anf_graph);
+  convertor.use_inputs_ = true;
+  convertor.inputs_ = inputs;
+  (void)convertor.ConvertAllNode().BuildGraph();
+  std::string name = graph_node->ToString() + "_ge_graph.dot";
+  if (MsContext::GetInstance()->save_graphs_flag()) {
+    convertor.DrawComputeGraph(name);
+  }
+  branches_map_[node.get()] = *(convertor.df_graph_);
+}
+
 // Update GE op's shape and type info
 void DfGraphConvertor::UpdateOpDesc(const AnfNodePtr node) {
   if (nullptr == node || !node->isa<CNode>()) {
@@ -1350,6 +1516,7 @@ void DfGraphConvertor::ConvertMakeTuple(const CNodePtr node) {
     }
   }
 
+  MS_LOG(WARNING) << "ConvertMakeTuple: " << node.get() << " " << tuple_items->size();
   tuple_out_handle_cache_[node.get()] = tuple_items;
 }
 
@@ -1713,6 +1880,14 @@ bool DfGraphConvertor::CheckCNode(const std::string &name, const CNodePtr node)
     return false;
   }
 
+  if (name == "" && GetCNodeFuncName(node) == "switch_layer") {
+    return false;
+  }
+
+  if (name == "Partial") {
+    return false;
+  }
+
   // make_tuple is used for a dynamic_input, convert it to a vector of OutHandlers
   if (name == "make_tuple") {
     ConvertMakeTuple(node);
@@ -1734,7 +1909,7 @@ bool DfGraphConvertor::CheckCNode(const std::string &name, const CNodePtr node)
 }
 
 OperatorPtr DfGraphConvertor::ConvertCNode(const CNodePtr node) {
-  std::string name = GetCNodeFuncName(node);
+  std::string name = GetCNodeTargetFuncName(node);
   if (!CheckCNode(name, node)) {
     return nullptr;
   }
@@ -1881,7 +2056,7 @@ void DfGraphConvertor::DrawCNode(const CNodePtr node, const OpAdapterPtr adpt) {
   }
 
   compute_sout_ << "<tr><td colspan=\"" << (input_map.size() + dyn_input_map.size()) << "\">\"" << node->ToString()
-                << ":" << GetCNodeFuncName(node) << "\"</td></tr>" << endl;
+                << ":" << GetCNodeTargetFuncName(node) << "\"</td></tr>" << endl;
 
   // print attrs' values
   auto atts = adpt->GetAttrsFromDrawGraph();
diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/graph_ir/convert.h
similarity index 94%
rename from mindspore/ccsrc/transform/convert.h
rename to mindspore/ccsrc/transform/graph_ir/convert.h
index 2f6c9bb0ad..6fa27831bf 100644
--- a/mindspore/ccsrc/transform/convert.h
+++ b/mindspore/ccsrc/transform/graph_ir/convert.h
@@ -31,11 +31,11 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #include "utils/config_manager.h"
-#include "transform/op_declare.h"
+#include "transform/graph_ir/op_declare.h"
 #include "graph/operator_reg.h"
 #ifdef OPEN_SOURCE
 #include "ge/client/ge_api.h"
@@ -201,6 +201,7 @@ class DfGraphConvertor {
   OperatorPtr ConvertParameter(AnfNodePtr node);
   Status TryConvertValueNodeToMultiConst(const ValueNodePtr node);
   OperatorPtr ConvertValueNode(ValueNodePtr node);
+  void GetCaseNodeInput(const CNodePtr node, const CNodePtr input_node);
   void ConvertTupleGetItem(const CNodePtr node);
   void GetDependOnParameterUse(const CNodePtr &node, const AnfNodePtr &src_node, const AnfNodePtr &dest_node,
                                const std::shared_ptr<std::vector<OperatorPtr>> &src_ops_list,
@@ -217,6 +218,8 @@ class DfGraphConvertor {
   void SetNodeInput(AnfNodePtr node);
   void SetOpControlInput(const AnfNodePtr node);
   void UpdateOpDesc(AnfNodePtr node);
+  void SetSubgraph(AnfNodePtr node);
+  void ProcessSubgraph(AnfNodePtr node, const std::vector<AnfNodePtr> &inputs);
   void BuildSaveCheckpointGraph();
   void DrawCNode(const CNodePtr node, const OpAdapterPtr adpt);
   void UpdateDataOpDesc(const AnfNodePtr &it, const OperatorPtr &op) const;
@@ -228,22 +231,26 @@ class DfGraphConvertor {
   std::shared_ptr<DfGraph> save_ckp_graph_{nullptr};
   std::shared_ptr<DfGraph> restore_ckp_graph_{nullptr};
   std::shared_ptr<DfGraph> broadcast_graph_{nullptr};
+  std::unordered_map<AnfNode *, DfGraph> branches_map_;
   std::unordered_map<AnfNode *, OperatorPtr> op_cache_;
   std::unordered_map<AnfNode *, std::vector<ControlEdge>> control_depend_cache_;
   /* record "tuple_getitem"<->"out_handler" mapping */
   std::unordered_map<AnfNode *, OutHandler> out_handle_cache_;
   /* record "make_tuple"<->"out_handler vector" mapping */
   std::unordered_map<AnfNode *, std::shared_ptr<std::vector<OutHandler>>> tuple_out_handle_cache_;
+  std::unordered_map<AnfNode *, std::shared_ptr<std::vector<AnfNodePtr>>> case_input_handle_cache_;
   std::unordered_map<std::string, AnfNodePtr> params_;
   std::unordered_map<std::string, OperatorPtr> vars_;
   std::vector<std::pair<ge::Operator, std::string>> graph_outputs_;
   std::vector<OperatorPtr> graph_const_inputs_;
   std::vector<OperatorPtr> init_ops_;
   std::vector<OperatorPtr> broadcast_ops_;
+  std::vector<AnfNodePtr> inputs_;
   OperatorPtr dataset_iter_getnext_;
   Status error_ = SUCCESS;
   bool training_ = false;
   bool distribute_ = false;
+  bool use_inputs_ = false;
 };
 }  // namespace transform
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/transform/df_graph_manager.cc b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
similarity index 97%
rename from mindspore/ccsrc/transform/df_graph_manager.cc
rename to mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
index f62c386587..29985d6784 100644
--- a/mindspore/ccsrc/transform/df_graph_manager.cc
+++ b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 #include <dirent.h>
 #include <dlfcn.h>
@@ -22,8 +22,8 @@
 #include <sstream>
 
 #include "securec/include/securec.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/pipeline.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/pipeline.h"
 #include "utils/config_manager.h"
 #ifndef NO_DLIB
 #include "tdt/tsd_client.h"
diff --git a/mindspore/ccsrc/transform/df_graph_manager.h b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
similarity index 98%
rename from mindspore/ccsrc/transform/df_graph_manager.h
rename to mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
index 2ca43d1f07..8a574b7a04 100644
--- a/mindspore/ccsrc/transform/df_graph_manager.h
+++ b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
@@ -23,7 +23,7 @@
 #include <mutex>
 #include <map>
 #include <utility>
-#include "transform/types.h"
+#include "transform/graph_ir/types.h"
 #include "ir/anf.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/transform/graph_builder.cc b/mindspore/ccsrc/transform/graph_ir/graph_builder.cc
similarity index 97%
rename from mindspore/ccsrc/transform/graph_builder.cc
rename to mindspore/ccsrc/transform/graph_ir/graph_builder.cc
index 785c5c7f3a..6ee45feef8 100644
--- a/mindspore/ccsrc/transform/graph_builder.cc
+++ b/mindspore/ccsrc/transform/graph_ir/graph_builder.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/graph_builder.h"
+#include "transform/graph_ir/graph_builder.h"
 
 #include <sstream>
 #include <new>
diff --git a/mindspore/ccsrc/transform/graph_builder.h b/mindspore/ccsrc/transform/graph_ir/graph_builder.h
similarity index 92%
rename from mindspore/ccsrc/transform/graph_builder.h
rename to mindspore/ccsrc/transform/graph_ir/graph_builder.h
index 3d959f5a85..5162674242 100644
--- a/mindspore/ccsrc/transform/graph_builder.h
+++ b/mindspore/ccsrc/transform/graph_ir/graph_builder.h
@@ -22,8 +22,8 @@
 #include <vector>
 #include <map>
 #include <utility>
-#include "transform/types.h"
-#include "transform/convert.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/convert.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/graph_runner.cc b/mindspore/ccsrc/transform/graph_ir/graph_runner.cc
similarity index 99%
rename from mindspore/ccsrc/transform/graph_runner.cc
rename to mindspore/ccsrc/transform/graph_ir/graph_runner.cc
index 52d0d8e17f..d20c49a381 100644
--- a/mindspore/ccsrc/transform/graph_runner.cc
+++ b/mindspore/ccsrc/transform/graph_ir/graph_runner.cc
@@ -14,7 +14,7 @@
  * Limitations under the License.
  */
 
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/graph_runner.h"
 #include <algorithm>
 #include <string>
 #include <memory>
diff --git a/mindspore/ccsrc/transform/graph_runner.h b/mindspore/ccsrc/transform/graph_ir/graph_runner.h
similarity index 93%
rename from mindspore/ccsrc/transform/graph_runner.h
rename to mindspore/ccsrc/transform/graph_ir/graph_runner.h
index 30769c8310..92db9e1413 100644
--- a/mindspore/ccsrc/transform/graph_runner.h
+++ b/mindspore/ccsrc/transform/graph_ir/graph_runner.h
@@ -23,10 +23,10 @@
 #include <map>
 #include <memory>
 
-#include "transform/types.h"
-#include "transform/util.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/op_adapter.h b/mindspore/ccsrc/transform/graph_ir/op_adapter.h
similarity index 96%
rename from mindspore/ccsrc/transform/op_adapter.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter.h
index ae678606a4..358cbd20a1 100644
--- a/mindspore/ccsrc/transform/op_adapter.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <unordered_map>
 
-#include "transform/op_adapter_util.h"
+#include "transform/graph_ir/op_adapter_util.h"
 #include "utils/utils.h"
 namespace mindspore {
 namespace transform {
@@ -164,6 +164,25 @@ class OpAdapter : public BaseOpAdapter {
   const std::unordered_map<unsigned int, AttrDesc> &getInputAttrMap() override { return input_attr_map_; }
   const std::unordered_map<int, DynInputDesc> &getDynInputMap() override { return dyn_input_map_; }
   const std::unordered_map<int, OutputDesc> &getOutputMap() override { return output_map_; }
+  const std::unordered_map<int, DynSubGraphDesc> &getDynSubgraphMap() override { return dyn_subgraph_map_; }
+
+  Status SetOpSubgraphFunc(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) {
+    MS_EXCEPTION_IF_NULL(op);
+    auto it = dyn_subgraph_map_.find(index);
+    if (it != dyn_subgraph_map_.end()) {
+      auto size = branches->size();
+      it->second.create_dyn_subgraph(op, static_cast<unsigned int>(size));
+      for (size_t i = 0; i < size; i++) {
+        it->second.set_subgraph(op, static_cast<unsigned int>(i), std::make_shared<DfGraph>((*branches)[i]));
+      }
+      return SUCCESS;
+    }
+    return NOT_FOUND;
+  }
+
+  int setSubgraph(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) override {
+    return static_cast<int>(SetOpSubgraphFunc(op, index, branches));
+  }
 
   Status SetCustomOpInput(const CusOperatorPtr &op, int index, const OperatorPtr &input) {
     MS_EXCEPTION_IF_NULL(op);
@@ -855,6 +874,7 @@ class OpAdapter : public BaseOpAdapter {
   static const std::unordered_map<int, DynInputDesc> dyn_input_map_;
   static const std::unordered_map<int, OutputDesc> output_map_;
   static const std::unordered_map<int, DynOutputDesc> dyn_output_map_;
+  static const std::unordered_map<int, DynSubGraphDesc> dyn_subgraph_map_;
   static const std::unordered_map<std::string, AttrDesc> attr_map_;
   static const std::unordered_map<std::string, int> enum_map_;
   // convert input from anf graph to Attr in Operators
@@ -874,6 +894,8 @@ const std::unordered_map<int, OutputDesc> OpAdapter<T>::output_map_;
 template <typename T>
 const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_;
 template <typename T>
+const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_;
+template <typename T>
 const std::unordered_map<std::string, AttrDesc> OpAdapter<T>::attr_map_;
 template <typename T>
 const std::unordered_map<std::string, int> OpAdapter<T>::enum_map_;
diff --git a/mindspore/ccsrc/transform/op_adapter_base.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
similarity index 90%
rename from mindspore/ccsrc/transform/op_adapter_base.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
index 01f96e251d..77e28dda94 100644
--- a/mindspore/ccsrc/transform/op_adapter_base.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
@@ -24,12 +24,11 @@
 #include <vector>
 #include <sstream>
 
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/anf.h"
 #include "ir/primitive.h"
 #include "ir/value.h"
-#include "transform/types.h"
-
+#include "transform/graph_ir/types.h"
 #ifdef ENABLE_GE
 #ifdef OPEN_SOURCE
 #include "graph/types.h"
@@ -43,7 +42,7 @@
 #include "external/ge/ge_api.h"
 #endif
 #include "graph/tensor.h"
-#include "transform/all_ops.h"
+#include "transform/graph_ir/all_ops.h"
 
 namespace ge {
 class CustomOperator : public Operator {
@@ -88,6 +87,8 @@ using DynInputOpFunc = std::function<void(OperatorPtr, unsigned int, OperatorPtr
 using DynInputHandleFunc = std::function<void(OperatorPtr, unsigned int, OutHandler)>;
 using UpdateOutputDescFunc = std::function<void(OperatorPtr, GeTensorDesc)>;
 using CreateDynOutputOpFunc = std::function<void(OperatorPtr, unsigned int)>;
+using CreateDynSubGraphFunc = std::function<void(OperatorPtr, unsigned int)>;
+using DynSubGraphFunc = std::function<void(OperatorPtr, unsigned int, DfGraphPtr)>;
 
 struct AttrDesc {
   std::string name;
@@ -108,6 +109,12 @@ struct DynInputDesc {
   DynInputHandleFunc set_handle;
 };
 
+struct DynSubGraphDesc {
+  std::string name;
+  CreateDynSubGraphFunc create_dyn_subgraph;
+  DynSubGraphFunc set_subgraph;
+};
+
 struct OutputDesc {
   std::string name;
   UpdateOutputDescFunc update_out_desc;
@@ -123,6 +130,7 @@ class BaseOpAdapter {
   virtual ~BaseOpAdapter() {}
   virtual OperatorPtr generate(const AnfNodePtr &anf) = 0;
   virtual OperatorPtr generate(const std::string &type) { return std::make_shared<ge::Operator>(type); }
+  virtual int setSubgraph(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) = 0;
   virtual int setInput(const OperatorPtr &op, int index, const OperatorPtr &input) = 0;
   virtual int setInput(const OperatorPtr &op, int index, const OutHandler &handle) = 0;
   virtual int setInput(const OperatorPtr &op, int index,
@@ -146,6 +154,7 @@ class BaseOpAdapter {
   virtual const std::unordered_map<unsigned int, AttrDesc> &getInputAttrMap() = 0;
   virtual const std::unordered_map<int, DynInputDesc> &getDynInputMap() = 0;
   virtual const std::unordered_map<int, OutputDesc> &getOutputMap() = 0;
+  virtual const std::unordered_map<int, DynSubGraphDesc> &getDynSubgraphMap() = 0;
   void AddAttrToDrawGraph(const std::string &attr_str) { attrs_vec_.push_back(attr_str); }
   const std::vector<std::string> &GetAttrsFromDrawGraph() const { return attrs_vec_; }
   void clearAttrVect() { attrs_vec_.clear(); }
diff --git a/mindspore/ccsrc/transform/op_adapter_util.cc b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
similarity index 99%
rename from mindspore/ccsrc/transform/op_adapter_util.cc
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
index cae43c13dc..78f1f263de 100644
--- a/mindspore/ccsrc/transform/op_adapter_util.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "transform/op_adapter_util.h"
+#include "transform/graph_ir/op_adapter_util.h"
 
 #include <string>
 #include <vector>
 #include <algorithm>
 
 #include "utils/utils.h"
-#include "transform/op_adapter_base.h"
+#include "transform/graph_ir/op_adapter_base.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/op_adapter_util.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
similarity index 98%
rename from mindspore/ccsrc/transform/op_adapter_util.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
index fcabc732d5..0a0d745ba2 100644
--- a/mindspore/ccsrc/transform/op_adapter_util.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 
-#include "transform/op_adapter_base.h"
+#include "transform/graph_ir/op_adapter_base.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare.cc
new file mode 100644
index 0000000000..e3751e0c92
--- /dev/null
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare.cc
@@ -0,0 +1,1330 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "transform/graph_ir/op_declare.h"
+
+#include <vector>
+
+#include "transform/graph_ir/all_ops.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace transform {
+#define INPUT_MAP(T) \
+  template <>        \
+  const std::unordered_map<int, InputDesc> OpAdapter<T>::input_map_
+#define EMPTY_INPUT_MAP std::unordered_map<int, InputDesc>()
+#define INPUT_DESC(name) \
+  {                      \
+#name, \
+    [](const OperatorPtr op, const OperatorPtr input) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_input_##name(*input); \
+    }, \
+    [](const OperatorPtr op, const OutHandler& handle) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_input_##name(*(handle.op), handle.out); \
+    }, \
+    [](const OperatorPtr op, const GeTensorDesc desc) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->update_input_desc_##name(desc); \
+    }                 \
+  }
+
+#define DYN_INPUT_MAP(T) \
+  template <>            \
+  const std::unordered_map<int, DynInputDesc> OpAdapter<T>::dyn_input_map_
+#define DYN_INPUT_DESC(name) \
+  {                          \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_input_##name(num); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const OperatorPtr input) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_input_##name(index, *input); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const OutHandler& handle) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_input_##name(index, *(handle.op), handle.out); \
+    }                     \
+  }
+
+#define DYN_SUBGRAPH_MAP(T) \
+  template <>               \
+  const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_
+#define DYN_SUBGRAPH_DESC(name) \
+  {                             \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_subgraph_##name(num); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const DfGraphPtr graph) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_subgraph_builder_##name(index, [graph](){return *graph;}); \
+    }                        \
+  }
+
+#define ATTR_MAP(T) \
+  template <>       \
+  const std::unordered_map<std::string, AttrDesc> OpAdapter<T>::attr_map_
+#define EMPTY_ATTR_MAP std::unordered_map<std::string, AttrDesc>()
+#define ATTR_DESC(name, ...) \
+  {                          \
+#name, \
+    [](const OperatorPtr op, const ValuePtr& value) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_attr_##name(ConvertAny(value, __VA_ARGS__)); \
+    }                     \
+  }
+
+#define INPUT_ATTR_MAP(T) \
+  template <>             \
+  const std::unordered_map<unsigned int, AttrDesc> OpAdapter<T>::input_attr_map_
+
+#define OUTPUT_MAP(T) \
+  template <>         \
+  const std::unordered_map<int, OutputDesc> OpAdapter<T>::output_map_
+#define OUTPUT_DESC(name) \
+  {                       \
+#name, \
+    [](const OperatorPtr op, const GeTensorDesc desc) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->update_output_desc_##name(desc); \
+    }                  \
+  }
+
+#define DYN_OUTPUT_MAP(T) \
+  template <>             \
+  const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_
+
+#define DYN_OUTPUT_DESC(name) \
+  {                           \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_output_##name(num); \
+    }                      \
+  }
+
+template <>
+std::unordered_map<std::string, std::unordered_map<int, std::string>> OpAdapter<ge::Operator>::cus_input_map_{};
+template <>
+std::unordered_map<std::string, std::unordered_map<int, std::string>> OpAdapter<ge::Operator>::cus_output_map_{};
+
+// --------------specialization for each operator----------
+// const
+INPUT_MAP(Const) = EMPTY_INPUT_MAP;
+ATTR_MAP(Const) = {{"value", ATTR_DESC(value, AnyTraits<AnyValue>())}};
+OUTPUT_MAP(Const) = {{0, OUTPUT_DESC(y)}};
+
+// Assign
+INPUT_MAP(Assign) = {{1, INPUT_DESC(ref)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(Assign) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Assign) = {{0, OUTPUT_DESC(ref)}};
+
+// Constant
+INPUT_MAP(Constant) = EMPTY_INPUT_MAP;
+ATTR_MAP(Constant) = {{"value", ATTR_DESC(value, AnyTraits<AnyValue>())}};
+OUTPUT_MAP(Constant) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyMomentumD
+INPUT_MAP(ApplyMomentumD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)}, {4, INPUT_DESC(grad)}, {5, INPUT_DESC(momentum)}};
+ATTR_MAP(ApplyMomentumD) = {{"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())},
+                            {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyMomentumD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}};
+
+// ScalarSummary
+INPUT_MAP(Summary) = {{2, INPUT_DESC(x)}};
+ATTR_MAP(Summary) = EMPTY_ATTR_MAP;
+
+// Data
+INPUT_MAP(Data) = EMPTY_INPUT_MAP;
+ATTR_MAP(Data) = EMPTY_ATTR_MAP;
+
+// BatchNorm
+INPUT_MAP(BatchNorm) = {{1, INPUT_DESC(x)},
+                        {2, INPUT_DESC(scale)},
+                        {3, INPUT_DESC(offset)},
+                        {4, INPUT_DESC(mean)},
+                        {5, INPUT_DESC(variance)}};
+ATTR_MAP(BatchNorm) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+                       {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                       {"is_training", ATTR_DESC(is_training, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchNorm) = {{0, OUTPUT_DESC(y)},
+                         {1, OUTPUT_DESC(batch_mean)},
+                         {2, OUTPUT_DESC(batch_variance)},
+                         {3, OUTPUT_DESC(reserve_space_1)},
+                         {4, OUTPUT_DESC(reserve_space_2)}};
+
+// BatchNormGrad
+INPUT_MAP(BatchNormGrad) = {{1, INPUT_DESC(y_backprop)},
+                            {2, INPUT_DESC(x)},
+                            {3, INPUT_DESC(scale)},
+                            {4, INPUT_DESC(reserve_space_1)},
+                            {5, INPUT_DESC(reserve_space_2)}};
+ATTR_MAP(BatchNormGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+                           {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                           {"is_training", ATTR_DESC(is_training, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchNormGrad) = {{0, OUTPUT_DESC(x_backprop)},
+                             {1, OUTPUT_DESC(scale_backprop)},
+                             {2, OUTPUT_DESC(offset_backprop)},
+                             {3, OUTPUT_DESC(reserve_space_4)},
+                             {4, OUTPUT_DESC(reserve_space_5)}};
+
+// Relu
+INPUT_MAP(Relu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Relu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu) = {{0, OUTPUT_DESC(y)}};
+
+// Elu
+INPUT_MAP(Elu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Elu) = {{"alpha", ATTR_DESC(alpha, AnyTraits<float>())}};
+OUTPUT_MAP(Elu) = {{0, OUTPUT_DESC(y)}};
+
+// EluGrad
+INPUT_MAP(EluGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(activations)}};
+ATTR_MAP(EluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(EluGrad) = {{0, OUTPUT_DESC(y)}};
+
+// PRelu
+INPUT_MAP(PRelu) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(weight)}};
+ATTR_MAP(PRelu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(PRelu) = {{0, OUTPUT_DESC(y)}};
+
+// PReluGrad
+INPUT_MAP(PReluGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(features)}, {3, INPUT_DESC(weights)}};
+ATTR_MAP(PReluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(da)}};
+
+// Sigmoid
+INPUT_MAP(Sigmoid) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sigmoid) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sigmoid) = {{0, OUTPUT_DESC(y)}};
+
+// SigmoidGrad
+INPUT_MAP(SigmoidGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(SigmoidGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidGrad) = {{0, OUTPUT_DESC(z)}};
+
+// L2NormalizeGrad
+INPUT_MAP(L2NormalizeGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(dy)}};
+ATTR_MAP(L2NormalizeGrad) = {
+  {"axis", ATTR_DESC(dim, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"epsilon", ATTR_DESC(eps, AnyTraits<float>())}};
+OUTPUT_MAP(L2NormalizeGrad) = {{0, OUTPUT_DESC(dx)}};
+
+// LarsV2Update
+INPUT_MAP(LarsV2Update) = {{1, INPUT_DESC(w)},
+                           {2, INPUT_DESC(g)},
+                           {3, INPUT_DESC(w_square_sum)},
+                           {4, INPUT_DESC(g_square_sum)},
+                           {5, INPUT_DESC(weight_decay)},
+                           {6, INPUT_DESC(learning_rate)}};
+ATTR_MAP(LarsV2Update) = {{"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                          {"hyperpara", ATTR_DESC(hyperpara, AnyTraits<float>())},
+                          {"use_clip", ATTR_DESC(use_clip, AnyTraits<bool>())}};
+OUTPUT_MAP(LarsV2Update) = {{0, OUTPUT_DESC(g_new)}};
+
+// L2Normalize
+INPUT_MAP(L2Normalize) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(L2Normalize) = {
+  {"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"epsilon", ATTR_DESC(eps, AnyTraits<float>())}};
+OUTPUT_MAP(L2Normalize) = {{0, OUTPUT_DESC(y)}};
+
+// CumsumD
+INPUT_MAP(CumsumD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(CumsumD) = {{2, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(CumsumD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
+                     {"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
+OUTPUT_MAP(CumsumD) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxV2
+INPUT_MAP(SoftmaxV2) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SoftmaxV2) = {
+  {"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(SoftmaxV2) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxGrad
+INPUT_MAP(SoftmaxGrad) = {{1, INPUT_DESC(softmax)}, {2, INPUT_DESC(grad_softmax)}};
+OUTPUT_MAP(SoftmaxGrad) = {{0, OUTPUT_DESC(grad_x)}};
+ATTR_MAP(SoftmaxGrad) = EMPTY_ATTR_MAP;
+
+// Flatten
+INPUT_MAP(Flatten) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Flatten) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Flatten) = {{0, OUTPUT_DESC(y)}};
+
+// add
+INPUT_MAP(Add) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Add) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Add) = {{0, OUTPUT_DESC(y)}};
+
+// GatherV2
+INPUT_MAP(GatherV2) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(axis)}};
+ATTR_MAP(GatherV2) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherV2) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceSumD
+INPUT_MAP(ReduceSumD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceSumD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceSumD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceSumD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceProdD
+INPUT_MAP(ReduceProdD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceProdD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceProdD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceProdD) = {{0, OUTPUT_DESC(y)}};
+
+// CumprodD
+INPUT_MAP(CumprodD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(CumprodD) = {{2, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(CumprodD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
+                      {"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
+OUTPUT_MAP(CumprodD) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxCrossEntropyWithLogits
+INPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(labels)}};
+ATTR_MAP(SoftmaxCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(backprop)}};
+
+// MeanGrad
+INPUT_MAP(MeanGrad) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(MeanGrad) = {{2, ATTR_DESC(mean_grad_output_shape_value, kOpFormat_NHWC,
+                                          AnyTraits<std::vector<int64_t>>(), AnyTraits<int64_t>())}};
+ATTR_MAP(MeanGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<int64_t>())}};
+
+INPUT_MAP(SliceD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(offsets, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                          {3, ATTR_DESC(size, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(SliceD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SliceD) = {{0, OUTPUT_DESC(y)}};
+
+// MaxPool
+INPUT_MAP(MaxPool) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(MaxPool) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                     {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPool) = {{0, OUTPUT_DESC(y)}};
+
+// AvgPool
+INPUT_MAP(AvgPool) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(AvgPool) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                     {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(AvgPool) = {{0, OUTPUT_DESC(y)}};
+
+// GreaterEqual
+INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GreaterEqual) = {{0, OUTPUT_DESC(y)}};
+
+// AssignAdd
+INPUT_MAP(AssignAdd) = {{1, INPUT_DESC(ref)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(AssignAdd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AssignAdd) = {{0, OUTPUT_DESC(ref)}};
+
+// AssignSub
+INPUT_MAP(AssignSub) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(AssignSub) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AssignSub) = {{0, OUTPUT_DESC(var)}};
+
+// Cos
+INPUT_MAP(Cos) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Cos) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Cos) = {{0, OUTPUT_DESC(y)}};
+
+// Acos
+INPUT_MAP(Acos) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Acos) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Acos) = {{0, OUTPUT_DESC(y)}};
+
+// AcosGrad
+INPUT_MAP(AcosGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AcosGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AcosGrad) = {{0, OUTPUT_DESC(z)}};
+
+// Acosh
+INPUT_MAP(Acosh) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Acosh) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Acosh) = {{0, OUTPUT_DESC(y)}};
+
+// AcoshGrad
+INPUT_MAP(AcoshGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AcoshGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AcoshGrad) = {{0, OUTPUT_DESC(z)}};
+
+// Floor
+INPUT_MAP(Floor) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Floor) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Floor) = {{0, OUTPUT_DESC(y)}};
+
+// FloorDiv
+INPUT_MAP(FloorDiv) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(FloorDiv) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(FloorDiv) = {{0, OUTPUT_DESC(y)}};
+
+// FloorMod
+INPUT_MAP(FloorMod) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(FloorMod) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(FloorMod) = {{0, OUTPUT_DESC(y)}};
+
+// Sin
+INPUT_MAP(Sin) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sin) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sin) = {{0, OUTPUT_DESC(y)}};
+
+// Exp
+INPUT_MAP(Exp) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Exp) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Exp) = {{0, OUTPUT_DESC(y)}};
+
+// BoundingBoxEncode
+INPUT_MAP(BoundingBoxEncode) = {
+  {1, INPUT_DESC(anchor_box)},
+  {2, INPUT_DESC(ground_truth_box)},
+};
+ATTR_MAP(BoundingBoxEncode) = {
+  {"means", ATTR_DESC(means, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"stds", ATTR_DESC(stds, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+};
+OUTPUT_MAP(BoundingBoxEncode) = {{0, OUTPUT_DESC(delats)}};
+
+// BoundingBoxDecode
+INPUT_MAP(BoundingBoxDecode) = {
+  {1, INPUT_DESC(rois)},
+  {2, INPUT_DESC(deltas)},
+};
+ATTR_MAP(BoundingBoxDecode) = {
+  {"means", ATTR_DESC(means, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"stds", ATTR_DESC(stds, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"max_shape", ATTR_DESC(max_shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"wh_ratio_clip", ATTR_DESC(wh_ratio_clip, AnyTraits<float>())},
+};
+OUTPUT_MAP(BoundingBoxDecode) = {{0, OUTPUT_DESC(bboxes)}};
+
+// TopK
+INPUT_MAP(TopK) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(k)}};
+ATTR_MAP(TopK) = {{"sorted", ATTR_DESC(sorted, AnyTraits<bool>())}};
+OUTPUT_MAP(TopK) = {{0, OUTPUT_DESC(values)}, {1, OUTPUT_DESC(indices)}};
+
+// Multiply
+INPUT_MAP(Multiply) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
+ATTR_MAP(Multiply) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Multiply) = {{0, OUTPUT_DESC(z)}};
+
+// TileD
+INPUT_MAP(TileD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TileD) = {{2, ATTR_DESC(multiples, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TileD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TileD) = {{0, OUTPUT_DESC(y)}};
+
+// OneHot
+INPUT_MAP(OneHot) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(depth)}, {3, INPUT_DESC(on_value)}, {4, INPUT_DESC(off_value)}};
+ATTR_MAP(OneHot) = {{"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}};
+OUTPUT_MAP(OneHot) = {{0, OUTPUT_DESC(y)}};
+
+// GatherV2D
+INPUT_MAP(GatherV2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}};
+INPUT_ATTR_MAP(GatherV2D) = {{3, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(GatherV2D) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherV2D) = {{0, OUTPUT_DESC(y)}};
+
+// Reshape
+INPUT_MAP(Reshape) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(shape)}};
+ATTR_MAP(Reshape) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Reshape) = {{0, OUTPUT_DESC(y)}};
+
+// TransShape
+INPUT_MAP(TransShape) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TransShape) = {{2, ATTR_DESC(outShape, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TransShape) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TransShape) = {{0, OUTPUT_DESC(y)}};
+
+// BiasAdd
+INPUT_MAP(BiasAdd) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(bias)}};
+ATTR_MAP(BiasAdd) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(BiasAdd) = {{0, OUTPUT_DESC(y)}};
+
+// Iou
+INPUT_MAP(Iou) = {{1, INPUT_DESC(bboxes)}, {2, INPUT_DESC(gtboxes)}};
+ATTR_MAP(Iou) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(Iou) = {{0, OUTPUT_DESC(overlap)}};
+
+// ResizeNearestNeighborV2D
+INPUT_MAP(ResizeNearestNeighborV2D) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ResizeNearestNeighborV2D) = {
+  {"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeNearestNeighborV2D) = {{0, OUTPUT_DESC(y)}};
+
+// ResizeNearestNeighborV2Grad
+INPUT_MAP(ResizeNearestNeighborV2Grad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(size)}};
+ATTR_MAP(ResizeNearestNeighborV2Grad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeNearestNeighborV2Grad) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyAdam
+INPUT_MAP(ApplyAdam) = {{1, INPUT_DESC(var)},         {2, INPUT_DESC(m)},           {3, INPUT_DESC(v)},
+                        {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
+                        {7, INPUT_DESC(beta1)},       {8, INPUT_DESC(beta2)},       {9, INPUT_DESC(epsilon)},
+                        {10, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyAdam) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                       {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyAdam) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyAdamD
+INPUT_MAP(ApplyAdamD) = {{1, INPUT_DESC(var)},         {2, INPUT_DESC(m)},           {3, INPUT_DESC(v)},
+                         {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
+                         {7, INPUT_DESC(beta1)},       {8, INPUT_DESC(beta2)},       {9, INPUT_DESC(epsilon)},
+                         {10, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyAdamD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                        {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyAdamD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(m)}, {2, OUTPUT_DESC(v)}};
+
+// Relu6
+INPUT_MAP(Relu6) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Relu6) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu6) = {{0, OUTPUT_DESC(y)}};
+
+// Relu6Grad
+INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}};
+ATTR_MAP(Relu6Grad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(backprops)}};
+
+// ResizeBilinearV2Grad
+INPUT_MAP(ResizeBilinearV2Grad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(original_image)}};
+ATTR_MAP(ResizeBilinearV2Grad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeBilinearV2Grad) = {{0, OUTPUT_DESC(y)}};
+
+// ResizeBilinearV2D
+INPUT_MAP(ResizeBilinearV2D) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ResizeBilinearV2D) = {
+  {"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeBilinearV2D) = {{0, OUTPUT_DESC(y)}};
+
+// ZerosLike
+INPUT_MAP(ZerosLike) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ZerosLike) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ZerosLike) = {{0, OUTPUT_DESC(y)}};
+
+// OnesLike
+INPUT_MAP(OnesLike) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(OnesLike) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(OnesLike) = {{0, OUTPUT_DESC(y)}};
+
+// NMSWithMask
+INPUT_MAP(NMSWithMask) = {{1, INPUT_DESC(box_scores)}};
+ATTR_MAP(NMSWithMask) = {{"iou_threshold", ATTR_DESC(iou_threshold, AnyTraits<float>())}};
+OUTPUT_MAP(NMSWithMask) = {
+  {0, OUTPUT_DESC(selected_boxes)}, {1, OUTPUT_DESC(selected_idx)}, {2, OUTPUT_DESC(selected_mask)}};
+
+// Unpack
+INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits<int>())}, {"num", ATTR_DESC(num, AnyTraits<int>())}};
+DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}};
+
+// TensorScatterUpdate
+INPUT_MAP(TensorScatterUpdate) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(TensorScatterUpdate) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TensorScatterUpdate) = {{0, OUTPUT_DESC(y)}};
+
+// ScatterUpdate
+INPUT_MAP(ScatterUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterUpdate) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterUpdate) = {{0, OUTPUT_DESC(var)}};
+
+// ScatterNdUpdate
+INPUT_MAP(ScatterNdUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterNdUpdate) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterNdUpdate) = {{0, OUTPUT_DESC(var)}};
+
+// ScatterMax
+INPUT_MAP(ScatterMax) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterMax) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterMax) = {{0, OUTPUT_DESC(var)}};
+
+// CheckValid
+INPUT_MAP(CheckValid) = {{1, INPUT_DESC(bbox_tensor)}, {2, INPUT_DESC(img_metas)}};
+ATTR_MAP(CheckValid) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(CheckValid) = {{0, OUTPUT_DESC(valid_tensor)}};
+
+// SmoothL1Loss
+INPUT_MAP(SmoothL1Loss) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(label)}};
+ATTR_MAP(SmoothL1Loss) = {{"sigma", ATTR_DESC(sigma, AnyTraits<float>())}};
+OUTPUT_MAP(SmoothL1Loss) = {{0, OUTPUT_DESC(loss)}};
+
+// SmoothL1LossGrad
+INPUT_MAP(SmoothL1LossGrad) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(label)}, {3, INPUT_DESC(dout)}};
+ATTR_MAP(SmoothL1LossGrad) = {{"sigma", ATTR_DESC(sigma, AnyTraits<float>())}};
+OUTPUT_MAP(SmoothL1LossGrad) = {{0, OUTPUT_DESC(gradient)}};
+
+// SigmoidCrossEntropyWithLogits
+INPUT_MAP(SigmoidCrossEntropyWithLogits) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(target)}};
+ATTR_MAP(SigmoidCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}};
+
+// SigmoidCrossEntropyWithLogitsGrad
+INPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {
+  {1, INPUT_DESC(predict)}, {2, INPUT_DESC(target)}, {3, INPUT_DESC(dout)}};
+ATTR_MAP(SigmoidCrossEntropyWithLogitsGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {{0, OUTPUT_DESC(gradient)}};
+
+// ScatterNdD
+INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ScatterNdD) = {
+  {3, ATTR_DESC(shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ScatterNdD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ScatterNdD) = {{0, OUTPUT_DESC(y)}};
+
+// PadD
+INPUT_MAP(PadD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(PadD) = {{"paddings", ATTR_DESC(paddings, AnyTraits<std::vector<std::vector<int64_t>>>())}};
+OUTPUT_MAP(PadD) = {{0, OUTPUT_DESC(y)}};
+
+// MirrorPad
+INPUT_MAP(MirrorPad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(paddings)}};
+ATTR_MAP(MirrorPad) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(MirrorPad) = {{0, OUTPUT_DESC(y)}};
+
+// MirrorPadGrad
+INPUT_MAP(MirrorPadGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(paddings)}};
+ATTR_MAP(MirrorPadGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(MirrorPadGrad) = {{0, OUTPUT_DESC(y)}};
+
+// GatherNd
+INPUT_MAP(GatherNd) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}};
+ATTR_MAP(GatherNd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherNd) = {{0, OUTPUT_DESC(y)}};
+
+// ROIAlign
+INPUT_MAP(ROIAlign) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(rois)}};
+OUTPUT_MAP(ROIAlign) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(ROIAlign) = {{"pooled_height", ATTR_DESC(pooled_height, AnyTraits<int>())},
+                      {"pooled_width", ATTR_DESC(pooled_width, AnyTraits<int>())},
+                      {"spatial_scale", ATTR_DESC(spatial_scale, AnyTraits<float>())},
+                      {"sample_num", ATTR_DESC(sample_num, AnyTraits<int>())},
+                      {"roi_end_mode", ATTR_DESC(roi_end_mode, AnyTraits<int>())}};
+
+// ROIAlignGrad
+INPUT_MAP(ROIAlignGrad) = {{1, INPUT_DESC(ydiff)}, {2, INPUT_DESC(rois)}};
+OUTPUT_MAP(ROIAlignGrad) = {{0, OUTPUT_DESC(xdiff)}};
+ATTR_MAP(ROIAlignGrad) = {
+  {"xdiff_shape", ATTR_DESC(xdiff_shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pooled_height", ATTR_DESC(pooled_height, AnyTraits<int>())},
+  {"pooled_width", ATTR_DESC(pooled_width, AnyTraits<int>())},
+  {"spatial_scale", ATTR_DESC(spatial_scale, AnyTraits<float>())},
+  {"sample_num", ATTR_DESC(sample_num, AnyTraits<int>())}};
+
+// ArgMaxD
+INPUT_MAP(ArgMaxD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMaxD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                     {"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
+OUTPUT_MAP(ArgMaxD) = {{0, OUTPUT_DESC(y)}};
+
+// ArgMinD
+INPUT_MAP(ArgMinD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMinD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                     {"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
+OUTPUT_MAP(ArgMinD) = {{0, OUTPUT_DESC(y)}};
+
+// ArgMaxWithValue
+INPUT_MAP(ArgMaxWithValue) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMaxWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                             {"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ArgMaxWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}};
+
+// ArgMinWithValue
+INPUT_MAP(ArgMinWithValue) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMinWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                             {"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ArgMinWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}};
+
+// ReduceAllD
+INPUT_MAP(ReduceAllD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceAllD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceAllD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceAllD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceMeanD
+INPUT_MAP(ReduceMeanD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMeanD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMeanD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMeanD) = {{0, OUTPUT_DESC(y)}};
+
+// HCOMAllreduce
+INPUT_MAP(HcomAllReduce) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomAllReduce) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomAllReduce) = {{"op", ATTR_DESC(reduction, AnyTraits<std::string>())},
+                           {"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                           {"fusion", ATTR_DESC(fusion, AnyTraits<int>())}};
+
+// HCOMBraodcast
+INPUT_MAP(HcomBroadcast) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(HcomBroadcast) = {{1, DYN_INPUT_DESC(x)}};
+DYN_OUTPUT_MAP(HcomBroadcast) = {{0, DYN_OUTPUT_DESC(y)}};
+ATTR_MAP(HcomBroadcast) = {{"root_rank", ATTR_DESC(root_rank, AnyTraits<int>())},
+                           {"group", ATTR_DESC(group, AnyTraits<std::string>())}};
+
+// HCOMAllreduce
+INPUT_MAP(HcomAllGather) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomAllGather) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomAllGather) = {{"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                           {"rank_size", ATTR_DESC(rank_size, AnyTraits<int>())}};
+
+// HCOMReduceScatter
+INPUT_MAP(HcomReduceScatter) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomReduceScatter) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomReduceScatter) = {{"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                               {"op", ATTR_DESC(reduction, AnyTraits<std::string>())},
+                               {"rank_size", ATTR_DESC(rank_size, AnyTraits<int>())}};
+
+// Variable
+INPUT_MAP(Variable) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Variable) = EMPTY_ATTR_MAP;
+
+// ReluGrad
+INPUT_MAP(ReluGrad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}};
+ATTR_MAP(ReluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ReluGrad) = {{0, OUTPUT_DESC(backprops)}};
+
+// BiasAddGrad
+INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(BiasAddGrad) = {{0, OUTPUT_DESC(y)}};
+
+// MaxPoolGrad
+INPUT_MAP(MaxPoolGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grad)}};
+ATTR_MAP(MaxPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                         {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolGrad) = {{0, OUTPUT_DESC(y)}};
+
+// avgpoolgrad
+INPUT_MAP(AvgPoolGrad) = {{1, INPUT_DESC(orig_input_shape)}, {2, INPUT_DESC(input_grad)}};
+ATTR_MAP(AvgPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                         {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(AvgPoolGrad) = {{0, OUTPUT_DESC(out_grad)}};
+
+// MaxPoolWithArgmax
+INPUT_MAP(MaxPoolWithArgmax) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(MaxPoolWithArgmax) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                               {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                               {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolWithArgmax) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(argmax)}};
+
+// MaxPoolGradWithArgmax
+INPUT_MAP(MaxPoolGradWithArgmax) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(grad)}, {3, INPUT_DESC(argmax)}};
+ATTR_MAP(MaxPoolGradWithArgmax) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                   {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                   {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolGradWithArgmax) = {{0, OUTPUT_DESC(y)}};
+
+// ExtractImagePatches
+INPUT_MAP(ExtractImagePatches) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ExtractImagePatches) = {{"ksizes", ATTR_DESC(ksizes, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"rates", ATTR_DESC(rates, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(ExtractImagePatches) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2D
+INPUT_MAP(Conv2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(filter)}};
+ATTR_MAP(Conv2D) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2D) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2DBackpropInputD
+INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filter)}};
+INPUT_ATTR_MAP(Conv2DBackpropInputD) = {
+  {3, ATTR_DESC(input_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(Conv2DBackpropInputD) = {
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2DBackpropInputD) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2DBackpropFilterD
+INPUT_MAP(Conv2DBackpropFilterD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(Conv2DBackpropFilterD) = {
+  {3, ATTR_DESC(filter_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(Conv2DBackpropFilterD) = {
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2DBackpropFilterD) = {{0, OUTPUT_DESC(y)}};
+
+// DepthwiseConv2D
+INPUT_MAP(DepthwiseConv2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(filter)}};
+ATTR_MAP(DepthwiseConv2D) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+};
+OUTPUT_MAP(DepthwiseConv2D) = {{0, OUTPUT_DESC(y)}};
+
+// DepthwiseConv2DBackpropInputD
+INPUT_MAP(DepthwiseConv2DBackpropInputD) = {{2, INPUT_DESC(filter)}, {3, INPUT_DESC(out_backprop)}};
+INPUT_ATTR_MAP(DepthwiseConv2DBackpropInputD) = {
+  {1, ATTR_DESC(input_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(DepthwiseConv2DBackpropInputD) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(DepthwiseConv2DBackpropInputD) = {{0, OUTPUT_DESC(input_grad)}};
+
+// DepthwiseConv2DBackpropFilterD
+INPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{1, INPUT_DESC(input)}, {3, INPUT_DESC(out_backprop)}};
+INPUT_ATTR_MAP(DepthwiseConv2DBackpropFilterD) = {
+  {2, ATTR_DESC(filter_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(DepthwiseConv2DBackpropFilterD) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{0, OUTPUT_DESC(filter_grad)}};
+
+// MatMulV2
+INPUT_MAP(MatMulV2) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(MatMulV2) = {{"transpose_a", ATTR_DESC(transpose_x1, AnyTraits<bool>())},
+                      {"transpose_b", ATTR_DESC(transpose_x2, AnyTraits<bool>())}};
+OUTPUT_MAP(MatMulV2) = {{0, OUTPUT_DESC(y)}};
+
+// Merge
+INPUT_MAP(Merge) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Merge) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Merge) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Merge) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(value_index)}};
+
+// Switch
+INPUT_MAP(Switch) = {{1, INPUT_DESC(data)}, {2, INPUT_DESC(pred)}};
+OUTPUT_MAP(Switch) = {{0, OUTPUT_DESC(output_false)}, {1, OUTPUT_DESC(output_true)}};
+ATTR_MAP(Switch) = EMPTY_ATTR_MAP;
+
+// AddN
+INPUT_MAP(AddN) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(AddN) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(AddN) = {{"n", ATTR_DESC(N, AnyTraits<int64_t>())}};
+OUTPUT_MAP(AddN) = {{0, OUTPUT_DESC(y)}};
+
+// Mul
+INPUT_MAP(Mul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Mul) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Mul) = {{0, OUTPUT_DESC(y)}};
+
+// RealDiv
+INPUT_MAP(RealDiv) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(RealDiv) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(RealDiv) = {{0, OUTPUT_DESC(y)}};
+
+// Cast
+INPUT_MAP(Cast) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(Cast) = {{2, ATTR_DESC(dst_type, AnyTraits<GEType>())}};
+ATTR_MAP(Cast) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Cast) = {{0, OUTPUT_DESC(y)}};
+
+// Case
+INPUT_MAP(Case) = {{1, INPUT_DESC(branch_index)}};
+DYN_INPUT_MAP(Case) = {{2, DYN_INPUT_DESC(input)}};
+ATTR_MAP(Case) = EMPTY_ATTR_MAP;
+DYN_OUTPUT_MAP(Case) = {{0, DYN_OUTPUT_DESC(output)}};
+DYN_SUBGRAPH_MAP(Case) = {{0, DYN_SUBGRAPH_DESC(branches)}};
+
+// Reciprocal
+INPUT_MAP(Reciprocal) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Reciprocal) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Reciprocal) = {{0, OUTPUT_DESC(y)}};
+
+// Sub
+INPUT_MAP(Sub) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Sub) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sub) = {{0, OUTPUT_DESC(y)}};
+
+// SplitD
+INPUT_MAP(SplitD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SplitD) = {{"axis", ATTR_DESC(split_dim, AnyTraits<int>())},
+                    {"output_num", ATTR_DESC(num_split, AnyTraits<int>())}};
+DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(y)}};
+
+// Range
+INPUT_MAP(RangeD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(RangeD) = {{"start", ATTR_DESC(start, AnyTraits<float>())},
+                    {"limit", ATTR_DESC(limit, AnyTraits<float>())},
+                    {"delta", ATTR_DESC(delta, AnyTraits<float>())}};
+OUTPUT_MAP(RangeD) = {{0, OUTPUT_DESC(y)}};
+
+// Neg
+INPUT_MAP(Neg) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Neg) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Neg) = {{0, OUTPUT_DESC(y)}};
+
+// Transpose
+INPUT_MAP(TransposeD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TransposeD) = {{2, ATTR_DESC(perm, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TransposeD) = EMPTY_ATTR_MAP;
+// Do not set Transpose operator output descriptor
+
+// DropOutGenMask
+INPUT_MAP(DropOutGenMask) = {{1, INPUT_DESC(shape)}, {2, INPUT_DESC(prob)}};
+ATTR_MAP(DropOutGenMask) = {{"Seed0", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                            {"Seed1", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(DropOutGenMask) = {{0, OUTPUT_DESC(y)}};
+
+// Pack
+INPUT_MAP(Pack) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Pack) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Pack) = {{"num", ATTR_DESC(N, AnyTraits<int>())}, {"axis", ATTR_DESC(axis, AnyTraits<int>())}};
+OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}};
+
+// ConcatD
+INPUT_MAP(ConcatD) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(ConcatD) = {
+  {"axis", ATTR_DESC(concat_dim, AnyTraits<int>())},
+  {"inputNums", ATTR_DESC(N, AnyTraits<int>())},
+};
+OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(y)}};
+
+// Less
+INPUT_MAP(Less) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Less) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Less) = {{0, OUTPUT_DESC(y)}};
+
+// Rsqrt
+INPUT_MAP(Rsqrt) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Rsqrt) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Rsqrt) = {{0, OUTPUT_DESC(y)}};
+
+// Sqrt
+INPUT_MAP(Sqrt) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sqrt) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sqrt) = {{0, OUTPUT_DESC(y)}};
+
+// Square
+INPUT_MAP(Square) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Square) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Square) = {{0, OUTPUT_DESC(y)}};
+
+// SquareSumAll
+INPUT_MAP(SquareSumAll) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(SquareSumAll) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SquareSumAll) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// Tanh
+INPUT_MAP(Tanh) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Tanh) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Tanh) = {{0, OUTPUT_DESC(y)}};
+
+// TanhGrad
+INPUT_MAP(TanhGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(TanhGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}};
+
+// ReduceMinD
+INPUT_MAP(ReduceMinD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMinD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMinD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMinD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceMaxD
+INPUT_MAP(ReduceMaxD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMaxD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMaxD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMaxD) = {{0, OUTPUT_DESC(y)}};
+
+// Maximum
+INPUT_MAP(Maximum) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Maximum) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Maximum) = {{0, OUTPUT_DESC(y)}};
+
+// Minimum
+INPUT_MAP(Minimum) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Minimum) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Minimum) = {{0, OUTPUT_DESC(y)}};
+
+// MaximumGrad
+INPUT_MAP(MaximumGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grads)}};
+ATTR_MAP(MaximumGrad) = {{"grad_x", ATTR_DESC(grad_x, AnyTraits<bool>())},
+                         {"grad_y", ATTR_DESC(grad_y, AnyTraits<bool>())}};
+OUTPUT_MAP(MaximumGrad) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// MinimumGrad
+INPUT_MAP(MinimumGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grads)}};
+ATTR_MAP(MinimumGrad) = {{"grad_x", ATTR_DESC(grad_x, AnyTraits<bool>())},
+                         {"grad_y", ATTR_DESC(grad_y, AnyTraits<bool>())}};
+OUTPUT_MAP(MinimumGrad) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// Pow
+INPUT_MAP(Pow) = {
+  {1, INPUT_DESC(x1)},
+  {2, INPUT_DESC(x2)},
+};
+ATTR_MAP(Pow) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Pow) = {{0, OUTPUT_DESC(y)}};
+
+// Equal
+INPUT_MAP(Equal) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Equal) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Equal) = {{0, OUTPUT_DESC(y)}};
+
+// NotEqual
+INPUT_MAP(NotEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(NotEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(NotEqual) = {{0, OUTPUT_DESC(y)}};
+
+// Log
+INPUT_MAP(Log) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Log) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Log) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalAnd
+INPUT_MAP(LogicalAnd) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LogicalAnd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalAnd) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalOr
+INPUT_MAP(LogicalOr) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LogicalOr) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalOr) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalNot
+INPUT_MAP(LogicalNot) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(LogicalNot) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalNot) = {{0, OUTPUT_DESC(y)}};
+
+// Greater
+INPUT_MAP(Greater) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Greater) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Greater) = {{0, OUTPUT_DESC(y)}};
+
+// LogSoftmaxGrad
+INPUT_MAP(LogSoftmaxGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(grad)}};
+ATTR_MAP(LogSoftmaxGrad) = {
+  {"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(LogSoftmaxGrad) = {{0, OUTPUT_DESC(y)}};
+
+// Select
+INPUT_MAP(Select) = {{1, INPUT_DESC(condition)}, {2, INPUT_DESC(x1)}, {3, INPUT_DESC(x2)}};
+ATTR_MAP(Select) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Select) = {{0, OUTPUT_DESC(y)}};
+
+// LessEqual
+INPUT_MAP(LessEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LessEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LessEqual) = {{0, OUTPUT_DESC(y)}};
+
+// LogSoftmaxV2
+INPUT_MAP(LogSoftmaxV2) = {{1, INPUT_DESC(logits)}};
+ATTR_MAP(LogSoftmaxV2) = {
+  {"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(LogSoftmaxV2) = {{0, OUTPUT_DESC(logsoftmax)}};
+
+// RandomChoiceWithMask
+INPUT_MAP(RandomChoiceWithMask) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(RandomChoiceWithMask) = {{"count", ATTR_DESC(count, AnyTraits<int64_t>())},
+                                  {"seed", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                                  {"seed2", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(RandomChoiceWithMask) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(mask)}};
+
+// TruncatedNormal
+INPUT_MAP(TruncatedNormal) = {{1, INPUT_DESC(shape)}};
+ATTR_MAP(TruncatedNormal) = {{"seed", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                             {"seed2", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(TruncatedNormal) = {{0, OUTPUT_DESC(y)}};
+
+// StridedSliceGrad
+INPUT_MAP(StridedSliceGrad) = {
+  {1, INPUT_DESC(dy)}, {2, INPUT_DESC(shape)}, {3, INPUT_DESC(begin)}, {4, INPUT_DESC(end)}, {5, INPUT_DESC(strides)}};
+ATTR_MAP(StridedSliceGrad) = {{"begin_mask", ATTR_DESC(begin_mask, AnyTraits<int64_t>())},
+                              {"end_mask", ATTR_DESC(end_mask, AnyTraits<int64_t>())},
+                              {"ellipsis_mask", ATTR_DESC(ellipsis_mask, AnyTraits<int64_t>())},
+                              {"new_axis_mask", ATTR_DESC(new_axis_mask, AnyTraits<int64_t>())},
+                              {"shrink_axis_mask", ATTR_DESC(shrink_axis_mask, AnyTraits<int64_t>())}};
+OUTPUT_MAP(StridedSliceGrad) = {{0, OUTPUT_DESC(output)}};
+
+// Gelu
+INPUT_MAP(Gelu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Gelu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Gelu) = {{0, OUTPUT_DESC(y)}};
+
+// GeluGrad
+INPUT_MAP(GeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(y)}};
+ATTR_MAP(GeluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GeluGrad) = {{0, OUTPUT_DESC(z)}};
+
+// StridedSlice
+INPUT_MAP(StridedSlice) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(begin)}, {3, INPUT_DESC(end)}, {4, INPUT_DESC(strides)}};
+ATTR_MAP(StridedSlice) = {{"begin_mask", ATTR_DESC(begin_mask, AnyTraits<int64_t>())},
+                          {"end_mask", ATTR_DESC(end_mask, AnyTraits<int64_t>())},
+                          {"ellipsis_mask", ATTR_DESC(ellipsis_mask, AnyTraits<int64_t>())},
+                          {"new_axis_mask", ATTR_DESC(new_axis_mask, AnyTraits<int64_t>())},
+                          {"shrink_axis_mask", ATTR_DESC(shrink_axis_mask, AnyTraits<int64_t>())}};
+OUTPUT_MAP(StridedSlice) = {{0, OUTPUT_DESC(y)}};
+
+// UnsortedSegmentSum
+INPUT_MAP(UnsortedSegmentSumD) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(segment_ids)}};
+INPUT_ATTR_MAP(UnsortedSegmentSumD) = {{3, ATTR_DESC(num_segments, AnyTraits<int64_t>())}};
+ATTR_MAP(UnsortedSegmentSumD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(UnsortedSegmentSumD) = {{0, OUTPUT_DESC(y)}};
+
+// UnsortedSegmentMin
+INPUT_MAP(UnsortedSegmentMin) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(segment_ids)}, {3, INPUT_DESC(num_segments)}};
+ATTR_MAP(UnsortedSegmentMin) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(UnsortedSegmentMin) = {{0, OUTPUT_DESC(y)}};
+
+// ExpandDims
+INPUT_MAP(ExpandDims) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}};
+ATTR_MAP(ExpandDims) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ExpandDims) = {{0, OUTPUT_DESC(y)}};
+
+// Squeeze
+INPUT_MAP(Squeeze) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Squeeze) = {{"axis", ATTR_DESC(axis, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(Squeeze) = {{0, OUTPUT_DESC(y)}};
+
+// SGD
+INPUT_MAP(SGD) = {{1, INPUT_DESC(parameters)}, {2, INPUT_DESC(gradient)}, {3, INPUT_DESC(learning_rate)},
+                  {4, INPUT_DESC(accum)},      {5, INPUT_DESC(momentum)}, {6, INPUT_DESC(stat)}};
+ATTR_MAP(SGD) = {{"dampening", ATTR_DESC(dampening, AnyTraits<float>())},
+                 {"weight_decay", ATTR_DESC(weight_decay, AnyTraits<float>())},
+                 {"nesterov", ATTR_DESC(nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(SGD) = {{0, OUTPUT_DESC(parameters)}};
+
+// LayerNorm
+INPUT_MAP(LayerNorm) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(gamma)}, {3, INPUT_DESC(beta)}};
+ATTR_MAP(LayerNorm) = {{"begin_norm_axis", ATTR_DESC(begin_norm_axis, AnyTraits<int>())},
+                       {"begin_params_axis", ATTR_DESC(begin_params_axis, AnyTraits<int>())},
+                       {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())}};
+OUTPUT_MAP(LayerNorm) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(mean)}, {2, OUTPUT_DESC(variance)}};
+
+// LayerNormGrad
+INPUT_MAP(LayerNormGrad) = {
+  {1, INPUT_DESC(x)}, {2, INPUT_DESC(dy)}, {3, INPUT_DESC(variance)}, {4, INPUT_DESC(mean)}, {5, INPUT_DESC(gamma)}};
+ATTR_MAP(LayerNormGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LayerNormGrad) = {{0, OUTPUT_DESC(pd_x)}, {1, OUTPUT_DESC(pd_gamma)}, {2, OUTPUT_DESC(pd_beta)}};
+
+// BatchMatMul
+INPUT_MAP(BatchMatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x1, AnyTraits<bool>())},
+                         {"transpose_x2", ATTR_DESC(adj_x2, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchMatMul) = {{0, OUTPUT_DESC(y)}};
+
+// DropoutDoMask
+INPUT_MAP(DropOutDoMask) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(mask)}, {3, INPUT_DESC(keep_prob)}};
+ATTR_MAP(DropOutDoMask) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(DropOutDoMask) = {{0, OUTPUT_DESC(y)}};
+
+// NPUGetFloatStatus
+INPUT_MAP(NPUGetFloatStatus) = {{1, INPUT_DESC(addr)}};
+OUTPUT_MAP(NPUGetFloatStatus) = {{0, OUTPUT_DESC(data)}};
+ATTR_MAP(NPUGetFloatStatus) = EMPTY_ATTR_MAP;
+
+// NPUAllocFloatStatus
+INPUT_MAP(NPUAllocFloatStatus) = EMPTY_INPUT_MAP;
+ATTR_MAP(NPUAllocFloatStatus) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(NPUAllocFloatStatus) = {{0, OUTPUT_DESC(data)}};
+
+// NPUClearFloatStatus
+INPUT_MAP(NPUClearFloatStatus) = {{1, INPUT_DESC(addr)}};
+OUTPUT_MAP(NPUClearFloatStatus) = {{0, OUTPUT_DESC(data)}};
+ATTR_MAP(NPUClearFloatStatus) = EMPTY_ATTR_MAP;
+
+// Abs
+INPUT_MAP(Abs) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Abs) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Abs) = {{0, OUTPUT_DESC(y)}};
+
+// AbsGrad
+INPUT_MAP(AbsGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AbsGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AbsGrad) = {{0, OUTPUT_DESC(z)}};
+
+// BinaryCrossEntropy
+INPUT_MAP(BinaryCrossEntropy) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(weight)}};
+ATTR_MAP(BinaryCrossEntropy) = {{"reduction", ATTR_DESC(reduction, AnyTraits<std::string>())}};
+OUTPUT_MAP(BinaryCrossEntropy) = {{0, OUTPUT_DESC(output)}};
+
+// BinaryCrossEntropyGrad
+INPUT_MAP(BinaryCrossEntropyGrad) = {
+  {1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(grad_output)}, {4, INPUT_DESC(weight)}};
+ATTR_MAP(BinaryCrossEntropyGrad) = {{"reduction", ATTR_DESC(reduction, AnyTraits<std::string>())}};
+OUTPUT_MAP(BinaryCrossEntropyGrad) = {{0, OUTPUT_DESC(output)}};
+
+// SparseApplyAdagradD
+INPUT_MAP(SparseApplyAdagradD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(grad)}, {4, INPUT_DESC(indices)}};
+ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits<float>())},
+                                 {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyProximalAdagradD
+INPUT_MAP(ApplyProximalAdagradD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)},
+                                    {4, INPUT_DESC(l1)},  {5, INPUT_DESC(l2)},    {6, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyProximalAdagradD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyProximalAdagradD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}};
+
+// SparseApplyFtrlD
+INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)},
+                               {2, INPUT_DESC(accum)},
+                               {3, INPUT_DESC(linear)},
+                               {4, INPUT_DESC(grad)},
+                               {5, INPUT_DESC(indices)}};
+ATTR_MAP(SparseApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                              {"lr", ATTR_DESC(lr, AnyTraits<float>())},
+                              {"l1", ATTR_DESC(l1, AnyTraits<float>())},
+                              {"l2", ATTR_DESC(l2, AnyTraits<float>())},
+                              {"lr_power", ATTR_DESC(lr_power, AnyTraits<float>())}};
+OUTPUT_MAP(SparseApplyFtrlD) = {{0, OUTPUT_DESC(var)}};
+
+// SpaceToDepth
+INPUT_MAP(SpaceToDepth) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SpaceToDepth) = {{"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())}};
+OUTPUT_MAP(SpaceToDepth) = {{0, OUTPUT_DESC(y)}};
+
+// DepthToSpace
+INPUT_MAP(DepthToSpace) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(DepthToSpace) = {{"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())}};
+OUTPUT_MAP(DepthToSpace) = {{0, OUTPUT_DESC(y)}};
+
+// Sign
+INPUT_MAP(Sign) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sign) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sign) = {{0, OUTPUT_DESC(y)}};
+
+// Round
+INPUT_MAP(Round) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Round) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Round) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyFtrlD
+INPUT_MAP(ApplyFtrlD) = {{1, INPUT_DESC(var)},  {2, INPUT_DESC(accum)},   {3, INPUT_DESC(linear)},
+                         {4, INPUT_DESC(grad)}, {5, INPUT_DESC(lr)},      {6, INPUT_DESC(l1)},
+                         {7, INPUT_DESC(l2)},   {8, INPUT_DESC(lr_power)}};
+ATTR_MAP(ApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyFtrlD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}, {2, OUTPUT_DESC(linear)}};
+
+// Diag
+INPUT_MAP(Diag) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Diag) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Diag) = {{0, OUTPUT_DESC(y)}};
+
+// DiagPart
+INPUT_MAP(DiagPart) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(DiagPart) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(DiagPart) = {{0, OUTPUT_DESC(y)}};
+
+// SpaceToBatchD
+INPUT_MAP(SpaceToBatchD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SpaceToBatchD) = {
+  {"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())},
+  {"paddings", ATTR_DESC(paddings, AnyTraits<std::vector<std::vector<int64_t>>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(SpaceToBatchD) = {{0, OUTPUT_DESC(y)}};
+
+// BatchToSpaceD
+INPUT_MAP(BatchToSpaceD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(BatchToSpaceD) = {
+  {"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())},
+  {"crops", ATTR_DESC(crops, AnyTraits<std::vector<std::vector<int64_t>>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(BatchToSpaceD) = {{0, OUTPUT_DESC(y)}};
+
+// Atan2
+INPUT_MAP(Atan2) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Atan2) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Atan2) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyRMSPropD
+INPUT_MAP(ApplyRMSPropD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(ms)}, {3, INPUT_DESC(mom)}, {4, INPUT_DESC(lr)}, {5, INPUT_DESC(grad)}};
+INPUT_ATTR_MAP(ApplyRMSPropD) = {{6, ATTR_DESC(rho, AnyTraits<float>())},
+                                 {7, ATTR_DESC(momentum, AnyTraits<float>())},
+                                 {8, ATTR_DESC(epsilon, AnyTraits<float>())}};
+ATTR_MAP(ApplyRMSPropD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyRMSPropD) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyCenteredRMSProp
+INPUT_MAP(ApplyCenteredRMSProp) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(mg)},       {3, INPUT_DESC(ms)},
+                                   {4, INPUT_DESC(mom)}, {5, INPUT_DESC(grad)},     {6, INPUT_DESC(lr)},
+                                   {7, INPUT_DESC(rho)}, {8, INPUT_DESC(momentum)}, {9, INPUT_DESC(epsilon)}};
+ATTR_MAP(ApplyCenteredRMSProp) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyCenteredRMSProp) = {{0, OUTPUT_DESC(var)}};
+
+// L2Loss
+INPUT_MAP(L2Loss) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(L2Loss) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(L2Loss) = {{0, OUTPUT_DESC(y)}};
+
+// CTCLoss
+INPUT_MAP(CTCLoss) = {{1, INPUT_DESC(inputs)},
+                      {2, INPUT_DESC(labels_indices)},
+                      {3, INPUT_DESC(labels_values)},
+                      {4, INPUT_DESC(sequence_length)}};
+ATTR_MAP(CTCLoss) = {
+  {"preprocess_collapse_repeated", ATTR_DESC(preprocess_collapse_repeated, AnyTraits<bool>())},
+  {"ctc_merge_repeated", ATTR_DESC(ctc_merge_repeated, AnyTraits<bool>())},
+  {"ignore_longer_outputs_than_inputs", ATTR_DESC(ignore_longer_outputs_than_inputs, AnyTraits<bool>())}};
+OUTPUT_MAP(CTCLoss) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(gradient)}};
+
+// AscendQuant
+INPUT_MAP(AscendQuant) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(AscendQuant) = {{"scale", ATTR_DESC(scale, AnyTraits<float>())},
+                         {"offset", ATTR_DESC(offset, AnyTraits<float>())},
+                         {"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits<bool>())},
+                         {"round_mode", ATTR_DESC(round_mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(AscendQuant) = {{0, OUTPUT_DESC(y)}};
+
+// AscendDequant
+INPUT_MAP(AscendDequant) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(deq_scale)}};
+ATTR_MAP(AscendDequant) = {{"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits<bool>())},
+                           {"relu_flag", ATTR_DESC(relu_flag, AnyTraits<bool>())}};
+OUTPUT_MAP(AscendDequant) = {{0, OUTPUT_DESC(y)}};
+#ifdef ENABLE_GE
+// Print
+INPUT_MAP(Print) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Print) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Print) = EMPTY_ATTR_MAP;
+#endif
+}  // namespace transform
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/graph_ir/op_declare.h
similarity index 97%
rename from mindspore/ccsrc/transform/op_declare.h
rename to mindspore/ccsrc/transform/graph_ir/op_declare.h
index baa819f71f..e493ea0e52 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <unordered_map>
-#include "transform/op_adapter.h"
+#include "transform/graph_ir/op_adapter.h"
 
 namespace mindspore {
 namespace transform {
@@ -46,6 +46,10 @@ namespace transform {
   template <>                       \
   const std::unordered_map<int, DynInputDesc> OpAdapter<T>::dyn_input_map_;
 
+#define DECLARE_OP_USE_DYN_SUBGRAPH(T) \
+  template <>                          \
+  const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_;
+
 #define DECLARE_OP_USE_DYN_OUTPUT(T) \
   template <>                        \
   const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_;
@@ -235,6 +239,10 @@ DECLARE_OP_USE_OUTPUT(RealDiv)
 DECLARE_OP_ADAPTER(Cast)
 DECLARE_OP_USE_INPUT_ATTR(Cast)
 DECLARE_OP_USE_OUTPUT(Cast)
+DECLARE_OP_ADAPTER(Case)
+DECLARE_OP_USE_DYN_INPUT(Case)
+DECLARE_OP_USE_DYN_SUBGRAPH(Case)
+DECLARE_OP_USE_DYN_OUTPUT(Case)
 DECLARE_OP_ADAPTER(Reciprocal)
 DECLARE_OP_USE_OUTPUT(Reciprocal)
 DECLARE_OP_ADAPTER(Neg)
@@ -313,8 +321,8 @@ DECLARE_OP_ADAPTER(NPUAllocFloatStatus)
 DECLARE_OP_USE_OUTPUT(NPUAllocFloatStatus)
 DECLARE_OP_ADAPTER(NPUClearFloatStatus)
 DECLARE_OP_USE_OUTPUT(NPUClearFloatStatus)
-DECLARE_OP_ADAPTER(MatMul)
-DECLARE_OP_USE_OUTPUT(MatMul)
+DECLARE_OP_ADAPTER(MatMulV2)
+DECLARE_OP_USE_OUTPUT(MatMulV2)
 
 DECLARE_OP_ADAPTER(SoftmaxCrossEntropyWithLogits)
 DECLARE_OP_USE_OUTPUT(SoftmaxCrossEntropyWithLogits)
diff --git a/mindspore/ccsrc/transform/types.h b/mindspore/ccsrc/transform/graph_ir/types.h
similarity index 100%
rename from mindspore/ccsrc/transform/types.h
rename to mindspore/ccsrc/transform/graph_ir/types.h
diff --git a/mindspore/ccsrc/transform/util.cc b/mindspore/ccsrc/transform/graph_ir/util.cc
similarity index 99%
rename from mindspore/ccsrc/transform/util.cc
rename to mindspore/ccsrc/transform/graph_ir/util.cc
index b848ec117b..6ae665d69f 100644
--- a/mindspore/ccsrc/transform/util.cc
+++ b/mindspore/ccsrc/transform/graph_ir/util.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 
 #include <utility>
 #include <sstream>
diff --git a/mindspore/ccsrc/transform/util.h b/mindspore/ccsrc/transform/graph_ir/util.h
similarity index 99%
rename from mindspore/ccsrc/transform/util.h
rename to mindspore/ccsrc/transform/graph_ir/util.h
index 5d8db26ad1..32d4242c4f 100644
--- a/mindspore/ccsrc/transform/util.h
+++ b/mindspore/ccsrc/transform/graph_ir/util.h
@@ -25,7 +25,7 @@
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "ir/tensor.h"
-#include "transform/types.h"
+#include "transform/graph_ir/types.h"
 
 #include "graph/tensor.h"
 
diff --git a/mindspore/ccsrc/onnx/CMakeLists.txt b/mindspore/ccsrc/transform/onnx/CMakeLists.txt
similarity index 72%
rename from mindspore/ccsrc/onnx/CMakeLists.txt
rename to mindspore/ccsrc/transform/onnx/CMakeLists.txt
index a65ea6d450..0d2f6c947b 100644
--- a/mindspore/ccsrc/onnx/CMakeLists.txt
+++ b/mindspore/ccsrc/transform/onnx/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _ONNX_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_ONNX_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ONNX)
-add_library(_mindspore_onnx_obj OBJECT ${_ONNX_SRC_FILES})
+add_library(_mindspore_transform_onnx_obj OBJECT ${_ONNX_SRC_FILES})
diff --git a/mindspore/ccsrc/onnx/ir_exporter.cc b/mindspore/ccsrc/transform/onnx/ir_exporter.cc
similarity index 98%
rename from mindspore/ccsrc/onnx/ir_exporter.cc
rename to mindspore/ccsrc/transform/onnx/ir_exporter.cc
index 2f02f483f5..78858eea8a 100644
--- a/mindspore/ccsrc/onnx/ir_exporter.cc
+++ b/mindspore/ccsrc/transform/onnx/ir_exporter.cc
@@ -23,10 +23,10 @@
 #include <algorithm>
 #include <functional>
 
-#include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
+#include "ir/tensor.h"
+#include "ir/param_value.h"
 #include "debug/anf_ir_utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "proto/onnx.pb.h"
 
 namespace mindspore {
@@ -187,13 +187,9 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, onnx::Grap
     onnx::TensorProto *initializer_proto = graph_proto->add_initializer();
     initializer_proto->set_name(param_name);
     SetParamToTensorProto(param, initializer_proto);
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-    py::object obj = param_value->value();
-    py::object data = obj.attr("data");
-    if (py::isinstance<tensor::Tensor>(data)) {
-      auto method = data.attr("asnumpy");
-      py::array npy_data = method();
-      initializer_proto->set_raw_data(npy_data.request(true).ptr, static_cast<size_t>(npy_data.nbytes()));
+    auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param->default_param()->value());
+    if (tensor) {
+      initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes());
     }
   }
 }
diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc
similarity index 99%
rename from mindspore/ccsrc/onnx/onnx_exporter.cc
rename to mindspore/ccsrc/transform/onnx/onnx_exporter.cc
index 65a841246b..f69fb81a7e 100644
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc
@@ -25,9 +25,9 @@
 
 #include "debug/anf_ir_utils.h"
 #include "proto/onnx.pb.h"
-#include "operator/ops.h"
-#include "ir/param_value_py.h"
-#include "ir/tensor_py.h"
+#include "frontend/operator/ops.h"
+#include "ir/tensor.h"
+#include "ir/param_value.h"
 
 namespace mindspore {
 enum OpMergeMode {
@@ -449,13 +449,9 @@ void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphP
     initializer_proto->set_name(param_ptr->ToString());
     SetTensorProtoInfo(param_ptr, initializer_proto);
     // set value for initializer
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-    py::object obj = param_value->value();
-    py::object data = obj.attr("data");
-    if (py::isinstance<tensor::Tensor>(data)) {
-      auto method = data.attr("asnumpy");
-      py::array npy_data = method();
-      initializer_proto->set_raw_data(npy_data.request(true).ptr, static_cast<size_t>(npy_data.nbytes()));
+    auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_ptr->default_param()->value());
+    if (tensor) {
+      initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes());
     }
   }
 }
diff --git a/mindspore/ccsrc/utils/anf_ir.proto b/mindspore/ccsrc/utils/anf_ir.proto
index 145751e7f0..2ea0511fa8 100644
--- a/mindspore/ccsrc/utils/anf_ir.proto
+++ b/mindspore/ccsrc/utils/anf_ir.proto
@@ -227,6 +227,9 @@ message NodeProto {
   
   // other fields for debug
   optional uint64 output_i = 7;
+
+  // The full_name_with_scope of CNode
+  optional string full_name = 8;
 }
 
 // Models
diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc
index 427cc5e568..ceb95d5c8c 100644
--- a/mindspore/ccsrc/utils/callbacks.cc
+++ b/mindspore/ccsrc/utils/callbacks.cc
@@ -20,8 +20,8 @@
 #include <memory>
 #include <vector>
 #include "pybind11/pybind11.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/visible.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc
index 3174ec4b15..6001b295ad 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.cc
+++ b/mindspore/ccsrc/utils/callbacks_ge.cc
@@ -16,11 +16,11 @@
 
 #include "utils/callbacks_ge.h"
 #include "pybind11/pybind11.h"
-#include "ir/param_value_py.h"
-#include "transform/df_graph_manager.h"
-#include "transform/util.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/util.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/visible.h"
 
 namespace mindspore {
@@ -50,13 +50,10 @@ bool GetParameterShape(const FuncGraphPtr &graph, const std::string &param_name,
       return false;
     }
     if (param_node->name() == param_name) {
-      py::object parameter;
+      TensorPtr tensor;
       if (param_node->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-        parameter = param_value->value();
+        tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_node->default_param()->value());
       }
-      ValuePtr value = parse::data_converter::PyDataToValue(parameter);
-      TensorPtr tensor = std::dynamic_pointer_cast<tensor::Tensor>(value);
       if (tensor == nullptr) {
         shape->push_back(ONE_SHAPE);
       } else {
diff --git a/mindspore/ccsrc/utils/callbacks_ge.h b/mindspore/ccsrc/utils/callbacks_ge.h
index 9735c3000a..f0ef583aaa 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.h
+++ b/mindspore/ccsrc/utils/callbacks_ge.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "transform/types.h"
-#include "transform/util.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/comm_manager.cc b/mindspore/ccsrc/utils/comm_manager.cc
index 70adfb7467..de165c4aac 100644
--- a/mindspore/ccsrc/utils/comm_manager.cc
+++ b/mindspore/ccsrc/utils/comm_manager.cc
@@ -16,17 +16,27 @@
 
 #include "utils/comm_manager.h"
 #include "utils/convert_utils.h"
+
 #ifndef NO_DLIB
 #include "hccl/hcom.h"
 #endif
 
+#if defined(ENABLE_GPU)
+#include "runtime/device/gpu/distribution/collective_init.h"
+using CollectiveInitializer = mindspore::device::gpu::CollectiveInitializer;
+using CreateCommGroupFunc = mindspore::device::gpu::CreateCommGroupFunc;
+using GetRankIDByGroupFunc = mindspore::device::gpu::GetRankIDByGroupFunc;
+using GetGroupSizeFunc = mindspore::device::gpu::GetGroupSizeFunc;
+using DestroyGroupFunc = mindspore::device::gpu::DestroyGroupFunc;
+#endif
+
 namespace mindspore {
+#ifndef NO_DLIB
 CommManager &CommManager::GetInstance() noexcept {
   static CommManager instance("hccl");
   return instance;
 }
 
-#ifndef NO_DLIB
 #define HCCL_RUN_CHECK(op_name, group, op)                      \
   do {                                                          \
     auto hccl_result = (op);                                    \
@@ -79,7 +89,79 @@ bool CommManager::DestroyGroup(const string &group) const {
   HCCL_RUN_CHECK(string("destroy communicate group"), group, hcom_destroy_group(group.c_str()));
   return true;
 }
+#elif defined(ENABLE_GPU)
+CommManager &CommManager::GetInstance() noexcept {
+  static CommManager instance("nccl");
+  return instance;
+}
+
+bool CommManager::CreateGroupSync(const string &group, const vector<unsigned int> &rank_id_list) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  MS_LOG(INFO) << "Create communication group " << group << " by rank id list " << rank_id_list;
+  auto create_comm_group_funcptr =
+    reinterpret_cast<CreateCommGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "CreateCommGroup"));
+  MS_EXCEPTION_IF_NULL(create_comm_group_funcptr);
+  bool ret = (*create_comm_group_funcptr)(group, rank_id_list);
+  if (!ret) {
+    MS_LOG(ERROR) << "Creating group " << group << "for rank id list" << rank_id_list << "failed.";
+    return ret;
+  }
+  return ret;
+}
+
+bool CommManager::GetRankID(const string &group, unsigned int *rank_id) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto get_rank_id_funcptr =
+    reinterpret_cast<GetRankIDByGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "GetRankIDByGroup"));
+  MS_EXCEPTION_IF_NULL(get_rank_id_funcptr);
+  int rank = (*get_rank_id_funcptr)(group);
+  *rank_id = static_cast<unsigned int>(rank);
+  MS_LOG(INFO) << "This process rank id is " << *rank_id << " in group " << group;
+  return true;
+}
+
+bool CommManager::GetRankSize(const string &group, unsigned int *rank_size) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto get_group_size_funcptr =
+    reinterpret_cast<GetGroupSizeFunc>(dlsym(const_cast<void *>(collective_handle_), "GetGroupSize"));
+  MS_EXCEPTION_IF_NULL(get_group_size_funcptr);
+  int size = (*get_group_size_funcptr)(group);
+  *rank_size = static_cast<unsigned int>(size);
+  MS_LOG(INFO) << "Group " << group << " size is " << *rank_size;
+  return true;
+}
+
+bool CommManager::DestroyGroup(const string &group) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto destroy_group_funcptr =
+    reinterpret_cast<DestroyGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "DestroyGroup"));
+  MS_EXCEPTION_IF_NULL(destroy_group_funcptr);
+
+  bool ret = (*destroy_group_funcptr)(group);
+  if (!ret) {
+    MS_LOG(ERROR) << "Destroying group " << group << " failed.";
+    return ret;
+  }
+  return ret;
+}
 #else
+CommManager &CommManager::GetInstance() noexcept {
+  static CommManager instance("hccl");
+  return instance;
+}
+
 bool CommManager::CreateGroupSync(const string &, const vector<unsigned int> &) const { return true; }
 
 bool CommManager::GetRankID(const string &group, unsigned int *rank_id) const { return true; }
diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index 2f2471f460..d6381ec7e8 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -27,9 +27,10 @@
 #include "tdt/data_common.h"
 #endif
 #ifdef ENABLE_GE
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #endif
 #include "ir/tensor.h"
+#include "common/utils.h"
 
 namespace mindspore {
 #ifdef ENABLE_GE
@@ -89,7 +90,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   max_device_memory_ = kDefaultMaxDeviceMemory;
   print_file_path_ = "";
   enable_graph_kernel_ = false;
-  enable_sparse_flag_ = false;
+  enable_sparse_ = false;
 }
 
 std::shared_ptr<MsContext> MsContext::GetInstance() {
@@ -168,6 +169,11 @@ bool MsContext::OpenTsd() {
     return true;
   }
 
+  auto role = common::GetEnv("MS_ROLE");
+  if (strcmp(role.c_str(), "MS_SCHED") == 0 || strcmp(role.c_str(), "MS_PSERVER") == 0) {
+    return true;
+  }
+
   unsigned int device_id;
   unsigned int rank_size = 1;
 
diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h
index 3bca16f8ee..19205cccb8 100644
--- a/mindspore/ccsrc/utils/context/ms_context.h
+++ b/mindspore/ccsrc/utils/context/ms_context.h
@@ -161,8 +161,8 @@ class MsContext {
   void set_enable_graph_kernel(bool enable_graph_kernel) { enable_graph_kernel_ = enable_graph_kernel; }
   bool enable_graph_kernel() const { return enable_graph_kernel_; }
 
-  bool enable_sparse_flag() const { return enable_sparse_flag_; }
-  void set_enable_sparse_flag(bool enable_sparse_flag) { enable_sparse_flag_ = enable_sparse_flag; }
+  bool enable_sparse() const { return enable_sparse_; }
+  void set_enable_sparse(bool enable_sparse) { enable_sparse_ = enable_sparse; }
 
  private:
   MsContext(const std::string &backend_policy, const std::string &target);
@@ -207,7 +207,7 @@ class MsContext {
   float max_device_memory_;
   std::string print_file_path_;
   bool enable_graph_kernel_;
-  bool enable_sparse_flag_;
+  bool enable_sparse_;
 };
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index 8cb071b769..b1847d1df5 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -25,12 +25,12 @@
 #include <cfloat>
 
 #include "pybind11/pybind11.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/parse_base.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "ir/value.h"
 #include "ir/tensor.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "utils/base_ref_extends.h"
 
 namespace mindspore {
@@ -230,6 +230,20 @@ bool ValueToBool(const ValuePtr &v, bool *value) {
   return true;
 }
 
+bool BaseRefToInt(const ValuePtr &v, int *value) {
+  MS_EXCEPTION_IF_NULL(v);
+  if (v->isa<tensor::Tensor>()) {
+    auto tensor = v->cast<tensor::TensorPtr>();
+    (void)tensor->data_sync();
+    int *tensor_data = static_cast<int *>(tensor->data_c());
+    auto vb = tensor_data[0];
+    *value = vb;
+    return true;
+  }
+  MS_LOG(ERROR) << "Index must be tensor type.";
+  return false;
+}
+
 bool BaseRefToBool(const BaseRef &v, bool *value) {
   if (utils::isa<ValuePtr>(v)) {
     return ValueToBool(utils::cast<ValuePtr>(v), value);
@@ -435,8 +449,8 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple
       if (!param->has_default()) {
         MS_LOG(EXCEPTION) << "Can not determine value of Parameter " << index << " (" << param->name() << ")";
       }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-      *ret_val = param_value->value().attr("data");
+      auto tensor = param->default_param()->value();
+      *ret_val = py::cast(tensor);
     }
     return true;
   }
diff --git a/mindspore/ccsrc/utils/convert_utils.h b/mindspore/ccsrc/utils/convert_utils.h
index 40c3e88c5c..d4ecbf4408 100644
--- a/mindspore/ccsrc/utils/convert_utils.h
+++ b/mindspore/ccsrc/utils/convert_utils.h
@@ -28,7 +28,7 @@
 #include "utils/convert_utils_base.h"
 #include "utils/any.h"
 #include "utils/base_ref.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 
 namespace py = pybind11;
@@ -42,6 +42,7 @@ using TensorPtr = std::shared_ptr<Tensor>;
 py::object AnyToPyData(const Any &value);
 py::object BaseRefToPyData(const BaseRef &value);
 bool BaseRefToBool(const BaseRef &in, bool *out);
+bool BaseRefToInt(const ValuePtr &v, int *value);
 bool ValueToBool(const ValuePtr &in, bool *out);
 py::object ValuePtrToPyData(const ValuePtr &value);
 
diff --git a/mindspore/ccsrc/utils/graph_utils.h b/mindspore/ccsrc/utils/graph_utils.h
index 93edda3e34..2a9240ac84 100644
--- a/mindspore/ccsrc/utils/graph_utils.h
+++ b/mindspore/ccsrc/utils/graph_utils.h
@@ -29,7 +29,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "ir/scalar.h"
 #include "ir/tensor.h"
 #include "debug/label.h"
diff --git a/mindspore/ccsrc/utils/graph_utils_extends.cc b/mindspore/ccsrc/utils/graph_utils_extends.cc
index 0740c24236..852dd0e3f2 100644
--- a/mindspore/ccsrc/utils/graph_utils_extends.cc
+++ b/mindspore/ccsrc/utils/graph_utils_extends.cc
@@ -31,8 +31,8 @@
 #include "debug/label.h"
 #include "utils/log_adapter.h"
 #include "common/utils.h"
-#include "pipeline/parse/function_block.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/function_block.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_converter.cc b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
index ad87d6ae8f..9e8e51a46b 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
+++ b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
@@ -60,6 +60,9 @@ int AnfConverter::ValidateFileStr(const std::string &modelFile, std::string file
 bool AnfConverter::ReadOnnxFromBinary(const std::string &modelFile, google::protobuf::Message *onnx_model) {
   std::unique_ptr<char> onnx_file(new (std::nothrow) char[PATH_MAX]{0});
   int fd = open(onnx_file.get(), O_RDONLY);
+  if (fd < 0) {
+    MS_LOG(EXCEPTION) << "failed to open file";
+  }
   google::protobuf::io::FileInputStream input(fd);
   google::protobuf::io::CodedInputStream code_input(&input);
   code_input.SetTotalBytesLimit(INT_MAX, 536870912);
@@ -85,7 +88,7 @@ std::shared_ptr<FuncGraph> AnfConverter::RunAnfConverter(const std::string &file
     MS_LOG(ERROR) << "Trans data not support input format!";
   } else {
     modelFile = flagItem.substr(pos + 1);
-    std::cout << "input protobuf file path is: " << flagItem.substr(pos + 1) << std::endl;
+    std::cout << "input protobuf file path is: " << modelFile << std::endl;
   }
 
   if (ValidateFileStr(modelFile, ".pb") != 0) {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
index c3dfa5194f..fa1137e3f6 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
+++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
@@ -22,14 +22,12 @@
 #include <vector>
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "ir/tensor.h"
-#include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
 #include "proto/onnx.pb.h"
 #include "utils/log_adapter.h"
 
-using mindspore::tensor::TensorPy;
 using std::string;
 
 namespace mindspore {
@@ -121,13 +119,15 @@ bool MSANFModelParser::BuildParameterForFuncGraph(const ParameterPtr &node, cons
     std::string initial_data = initialize_proto.raw_data();
     auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
     MS_EXCEPTION_IF_NULL(tensor_data_buf);
-    memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), initial_data.data(), initial_data.size());
+    auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), initial_data.data(), initial_data.size());
+    if (ret != 0) {
+      MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
+    }
 
-    py::array array_data = TensorPy::AsNumpy(*tensor_info);
-    ParamValuePyPtr para_value_ptr = std::make_shared<ParamValuePy>();
-    MS_EXCEPTION_IF_NULL(para_value_ptr);
-    para_value_ptr->set_value(array_data);
-    node->set_default_param(para_value_ptr);
+    auto param_value = std::make_shared<ParamValue>();
+    MS_EXCEPTION_IF_NULL(param_value);
+    param_value->set_value(tensor_info);
+    node->set_default_param(param_value);
   }
   anfnode_build_map_[value_proto.name()] = node;
   return true;
@@ -252,7 +252,11 @@ bool MSANFModelParser::ObtainValueNodeInTensorForm(const std::string &value_node
   tensor::TensorPtr tensor_info = std::make_shared<tensor::Tensor>(kDefaultValueSwitchMap[attr_tensor_type], shape);
   const std::string &tensor_buf = attr_tensor.raw_data();
   auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
-  memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
+  auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
+  }
+
   auto new_value_node = NewValueNode(MakeValue(tensor_info));
   MS_EXCEPTION_IF_NULL(new_value_node);
   auto tensor_abstract = tensor_info->ToAbstract();
@@ -339,7 +343,6 @@ bool MSANFModelParser::GetAttrValueForValueNode(const std::string &ref_attr_name
       MS_LOG(ERROR) << "parse ValueNode value don't support input of ref_attr_name";
       return false;
   }
-  return true;
 }
 
 bool MSANFModelParser::BuildValueNodeForFuncGraph(const onnx::NodeProto &node_proto) {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
index 11b9cd101f..58fbd1bc70 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
+++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
@@ -32,7 +32,7 @@ using uint64 = uint64_t;
 using float16 = Eigen::half;
 class MSANFModelParser {
  public:
-  MSANFModelParser() = default;
+  MSANFModelParser() : producer_name_(""), model_version_(0), ir_version_(0) {}
   ~MSANFModelParser() = default;
 
   FuncGraphPtr Parse(const onnx::ModelProto &model_proto);
diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc
index 3588754dae..702deefcb4 100644
--- a/mindspore/ccsrc/utils/log_adapter.cc
+++ b/mindspore/ccsrc/utils/log_adapter.cc
@@ -18,7 +18,6 @@
 
 #include <unistd.h>
 #include <map>
-#include "pybind11/pybind11.h"
 #include "debug/trace.h"
 
 // namespace to support utils module definition
@@ -158,6 +157,7 @@ static std::string ExceptionTypeToString(ExceptionType type) {
 static const char *GetSubModuleName(SubModuleId module_id) {
   static const char *sub_module_names[NUM_SUBMODUES] = {
     "UNKNOWN",    // SM_UNKNOWN
+    "BASE",       // SM_BASE
     "ANALYZER",   // SM_ANALYZER
     "COMMON",     // SM_COMMON
     "DEBUG",      // SM_DEBUG
@@ -176,7 +176,8 @@ static const char *GetSubModuleName(SubModuleId module_id) {
     "PYNATIVE",   // SM_PYNATIVE
     "SESSION",    // SM_SESSION
     "UTILS",      // SM_UTILS
-    "VM"          // SM_VM
+    "VM",         // SM_VM
+    "ABSTRACT"    // SM_ABSTRACT
   };
 
   return sub_module_names[module_id % NUM_SUBMODUES];
@@ -219,16 +220,10 @@ void LogWriter::operator^(const LogStream &stream) const {
   trace::TraceGraphEval();
   trace::GetEvalStackInfo(oss);
 
-  if (exception_type_ == IndexError) {
-    throw pybind11::index_error(oss.str());
+  if (exception_handler_ != nullptr) {
+    exception_handler_(exception_type_, oss.str());
   }
-  if (exception_type_ == ValueError) {
-    throw pybind11::value_error(oss.str());
-  }
-  if (exception_type_ == TypeError) {
-    throw pybind11::type_error(oss.str());
-  }
-  pybind11::pybind11_fail(oss.str());
+  throw std::runtime_error(oss.str());
 }
 
 static std::string GetEnv(const std::string &envvar) {
diff --git a/mindspore/ccsrc/utils/log_adapter.h b/mindspore/ccsrc/utils/log_adapter.h
index dfd463ee1d..a0e9bfc6d6 100644
--- a/mindspore/ccsrc/utils/log_adapter.h
+++ b/mindspore/ccsrc/utils/log_adapter.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <sstream>
 #include <memory>
+#include <functional>
 #include "./overload.h"
 #include "./securec.h"
 #ifdef USE_GLOG
@@ -99,6 +100,7 @@ enum MsLogLevel : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION };
 
 enum SubModuleId : int {
   SM_UNKNOWN = 0,  // unknown submodule
+  SM_BASE,         // base
   SM_ANALYZER,     // static analyzer
   SM_COMMON,       // common
   SM_DEBUG,        // debug
@@ -118,6 +120,7 @@ enum SubModuleId : int {
   SM_SESSION,      // session
   SM_UTILS,        // utils
   SM_VM,           // VM
+  SM_ABSTRACT,     // abstract
   NUM_SUBMODUES    // number of submodules
 };
 
@@ -133,6 +136,8 @@ extern int g_ms_submodule_log_levels[] __attribute__((visibility("default")));
 
 class LogWriter {
  public:
+  using ExceptionHandler = std::function<void(ExceptionType, const std::string &msg)>;
+
   LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule,
             ExceptionType excp_type = NoExceptionType)
       : location_(location), log_level_(log_level), submodule_(submodule), exception_type_(excp_type) {}
@@ -141,6 +146,8 @@ class LogWriter {
   void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default")));
   void operator^(const LogStream &stream) const __attribute__((noreturn, visibility("default")));
 
+  static void set_exception_handler(ExceptionHandler exception_handler) { exception_handler_ = exception_handler; }
+
  private:
   void OutputLog(const std::ostringstream &msg) const;
 
@@ -148,6 +155,8 @@ class LogWriter {
   MsLogLevel log_level_;
   SubModuleId submodule_;
   ExceptionType exception_type_;
+
+  inline static ExceptionHandler exception_handler_ = nullptr;
 };
 
 #define MSLOG_IF(level, condition, excp_type)                                                                       \
diff --git a/mindspore/ccsrc/ir/param_value_py.h b/mindspore/ccsrc/utils/log_adapter_py.cc
similarity index 54%
rename from mindspore/ccsrc/ir/param_value_py.h
rename to mindspore/ccsrc/utils/log_adapter_py.cc
index a03e34ac6e..c4793b960b 100644
--- a/mindspore/ccsrc/ir/param_value_py.h
+++ b/mindspore/ccsrc/utils/log_adapter_py.cc
@@ -14,30 +14,33 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
-#define MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
+#include "utils/log_adapter.h"
 
-#include <memory>
-
-#include "ir/anf.h"
+#include <string>
 #include "pybind11/pybind11.h"
 
-namespace mindspore {
 namespace py = pybind11;
-
-class ParamValuePy : public ParamValue {
+namespace mindspore {
+class PyExceptionInitializer {
  public:
-  ParamValuePy() : value_(py::none()) {}
-  explicit ParamValuePy(const py::object &value) : value_(value) {}
-  ~ParamValuePy() override = default;
+  PyExceptionInitializer() { mindspore::LogWriter::set_exception_handler(HandleExceptionPy); }
 
-  py::object value() { return value_; }
-  void set_value(const py::object &obj) { value_ = obj; }
+  ~PyExceptionInitializer() = default;
 
  private:
-  py::object value_;
+  static void HandleExceptionPy(ExceptionType exception_type, const std::string &str) {
+    if (exception_type == IndexError) {
+      throw py::index_error(str);
+    }
+    if (exception_type == ValueError) {
+      throw py::value_error(str);
+    }
+    if (exception_type == TypeError) {
+      throw py::type_error(str);
+    }
+    py::pybind11_fail(str);
+  }
 };
 
-using ParamValuePyPtr = std::shared_ptr<ParamValuePy>;
+static PyExceptionInitializer py_exception_initializer;
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
diff --git a/mindspore/ccsrc/utils/primitive_utils.cc b/mindspore/ccsrc/utils/primitive_utils.cc
index 97fa954e12..490e2517a9 100644
--- a/mindspore/ccsrc/utils/primitive_utils.cc
+++ b/mindspore/ccsrc/utils/primitive_utils.cc
@@ -15,7 +15,7 @@
  */
 
 #include "utils/primitive_utils.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 #include "common/utils.h"
 
diff --git a/mindspore/ccsrc/utils/symbolic.h b/mindspore/ccsrc/utils/symbolic.h
index 1b7a212610..ca68b2c877 100644
--- a/mindspore/ccsrc/utils/symbolic.h
+++ b/mindspore/ccsrc/utils/symbolic.h
@@ -26,7 +26,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/any.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/tensorprint_utils.cc b/mindspore/ccsrc/utils/tensorprint_utils.cc
index ee53345f31..08cd4e4291 100644
--- a/mindspore/ccsrc/utils/tensorprint_utils.cc
+++ b/mindspore/ccsrc/utils/tensorprint_utils.cc
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 #include "ir/tensor.h"
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "./securec.h"
 #ifndef NO_DLIB
 #include "tdt/tsd_client.h"
@@ -256,6 +256,7 @@ bool SaveDataItem2File(const std::vector<tdt::DataItem> &items, const std::strin
     if (!print.SerializeToOstream(output)) {
       MS_LOG(ERROR) << "Save print file:" << print_file_path << " fail.";
       ret_end_thread = true;
+      break;
     }
     print.Clear();
   }
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index e28adb6e21..3e82aaff2d 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -176,6 +176,10 @@ constexpr auto kApplyAdamWithAmsgradOpName = "ApplyAdamWithAmsgrad";
 constexpr auto kTensorMoveOpName = "TensorMove";
 constexpr auto kTensorScatterUpdateOpName = "TensorScatterUpdate";
 constexpr auto kScatterNdUpdateOpName = "ScatterNdUpdate";
+constexpr auto kPushOpName = "Push";
+constexpr auto kPullOpName = "Pull";
+constexpr auto kEmbeddingLookupOpName = "EmbeddingLookup";
+constexpr auto kEmbeddingLookupProxyOpName = "EmbeddingLookupProxy";
 
 // attr key name
 constexpr auto kAttrInputNames = "input_names";
@@ -236,9 +240,12 @@ constexpr auto kAttrOutputNum = "output_num";
 constexpr auto kAttrSizeSplits = "size_splits";
 constexpr auto kAttrOutputDefault = "output_default";
 constexpr auto kAttrPrimitiveTarget = "primitive_target";
+constexpr auto kAttrUseLocking = "use_locking";
 constexpr auto kAttrReduceScatterFlag = "reduce_scatter_flag";
 constexpr auto kAttrOffset = "offset";
-constexpr auto kAttrUseLocking = "use_locking";
+constexpr auto kAttrPsKey = "ps_key";
+constexpr auto kAttrOptimizerType = "optim_type";
+constexpr auto kAttrChildGraph = "child_graph";
 
 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";
@@ -262,6 +269,7 @@ constexpr auto kAnfPartialFuncGraphIndex = 1;
 constexpr auto kRealInputNodeIndexInTupleGetItem = 1;
 constexpr auto kInputNodeOutputIndexInTupleGetItem = 2;
 constexpr auto kTupleGetItemInputSize = 3;
+constexpr auto kSwitchInputSize = 4;
 // index define of control depend
 constexpr auto kControlDependPriorIndex = 1;
 constexpr auto kControlDependBehindIndex = 2;
@@ -290,12 +298,24 @@ const std::set<std::string> kOpFormatList = {
   kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDHWC};
 const std::set<std::string> kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN};
 const std::set<std::string> kOptOperatorSet = {
-  kMomentumOpName,       kApplyMomentumOpName,        kApplyAdadeltaOpName,
-  kApplyAdagradOpName,   kApplyAdagradDAName,         kApplyAdamOpName,
-  kApplyAdaMaxOpName,    kApplyAddSignOpName,         kApplyCenteredRMSPOpName,
-  kApplyFtrlOpName,      kApplyFtrlV2OpName,          kApplyGradientDescentOpName,
-  kApplyPowerSignOpName, kApplyProximalAdagradOpName, kApplyProximalGradientDescentOpName,
+  kMomentumOpName,
+  kApplyMomentumOpName,
+  kApplyAdadeltaOpName,
+  kApplyAdagradOpName,
+  kApplyAdagradDAName,
+  kApplyAdamOpName,
+  kApplyAdaMaxOpName,
+  kApplyAddSignOpName,
+  kApplyCenteredRMSPOpName,
+  kApplyFtrlOpName,
+  kApplyFtrlV2OpName,
+  kApplyGradientDescentOpName,
+  kApplyPowerSignOpName,
+  kApplyProximalAdagradOpName,
+  kApplyProximalGradientDescentOpName,
   kApplyRMSPropOpName,
+  kPushOpName,
+  kPullOpName,
 };
 
 const std::set<std::string> kHWSpecialFormatSet = {kOpFormat_FRAC_Z,       kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0,
diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc
index 47bc69bbbb..0290ee57fc 100644
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -23,7 +23,7 @@
 #include "utils/callbacks.h"
 #include "utils/graph_utils.h"
 #include "utils/base_ref_extends.h"
-#include "session/session_factory.h"
+#include "backend/session/session_factory.h"
 #include "common/utils.h"
 #ifdef ENABLE_GE
 #include "utils/callbacks_ge.h"
@@ -32,6 +32,7 @@
 namespace mindspore {
 namespace compile {
 bool Backend::GetCond(const BaseRef &c, bool *const value) { return BaseRefToBool(c, value); }
+bool Backend::GetIndex(const BaseRef &c, int *const value) { return BaseRefToInt(utils::cast<ValuePtr>(c), value); }
 
 LinConvertResult MsBackend::GetMultiGraphRun(const FuncGraphPtr &g) {
   // multi_graph merge to one, big graph have paramters in begin and only have one output
diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h
index 3a93cf930f..208c4010fb 100644
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@@ -26,7 +26,7 @@
 #include "ir/anf.h"
 #include "vm/segment_runner.h"
 #include "vm/vm.h"
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 
 namespace mindspore {
 namespace compile {
@@ -46,6 +46,7 @@ class Backend {
   virtual void SimulateRun(FinalVMPtr, FuncGraphPtr) {}
   virtual SwitchCondStatus SetSimuCond(const BaseRef &, bool) { return kCondOk; }
   virtual bool GetCond(const BaseRef &c, bool *value);
+  virtual bool GetIndex(const BaseRef &c, int *value);
   virtual void SetSwitchGraph() {}
   virtual void SetSwitchActive(const BaseRef &, bool) {}
   virtual void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) {}
diff --git a/mindspore/ccsrc/vm/segment_runner.cc b/mindspore/ccsrc/vm/segment_runner.cc
index db27506134..540b77bcaf 100644
--- a/mindspore/ccsrc/vm/segment_runner.cc
+++ b/mindspore/ccsrc/vm/segment_runner.cc
@@ -31,7 +31,7 @@
 #include "utils/utils.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 const char kMsConvert[] = "ms";
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index 80d2fc9df9..2cf6ead813 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -26,9 +26,9 @@
 #include <string>
 #include <vector>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #ifdef ENABLE_GE
-#include "transform/convert.h"
+#include "transform/graph_ir/convert.h"
 #endif
 #include "utils/graph_utils.h"
 #include "utils/context/ms_context.h"
@@ -46,8 +46,9 @@ using TypedPrimitiveAbstractClosurePtr = std::shared_ptr<abstract::TypedPrimitiv
 std::vector<PrimitivePtr> nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
                                            prim::kPrimMakeTuple, prim::kPrimBpropCut};
 const std::vector<PrimitivePtr> &GetMsNonlinearOps() {
-  static const std::vector<PrimitivePtr> ms_nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
-                                                             prim::kPrimBpropCut};
+  static const std::vector<PrimitivePtr> ms_nonlinear_ops = {prim::kPrimReturn,   prim::kPrimPartial,
+                                                             prim::kPrimSwitch,   prim::kPrimMakeTuple,
+                                                             prim::kPrimBpropCut, prim::kPrimSwitchLayer};
   return ms_nonlinear_ops;
 }
 
@@ -187,6 +188,29 @@ std::vector<AnfNodePtr> SplitSort(const FuncGraphPtr &graph, const std::string &
   std::reverse(result.begin(), result.end());
   return result;
 }
+
+bool IsSubGraph(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->isa<CNode>()) {
+    auto cnode = node->cast<CNodePtr>();
+    auto &inputs = cnode->inputs();
+    if (inputs.empty()) {
+      MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
+    }
+
+    AnfNodePtr fn = inputs[0];
+    if (!IsValueNode<Primitive>(fn)) {
+      return false;
+    }
+    auto node_prim = GetValueNode<PrimitivePtr>(fn);
+    if (node_prim->name() == prim::kPrimPartial->name()) {
+      return true;
+    }
+  } else if (IsValueNode<FuncGraph>(node)) {
+    return true;
+  }
+  return false;
+}
 }  // namespace
 
 CompileGraph::CompileGraph(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list)
@@ -214,7 +238,6 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) {
     }
 
     AnfNodePtr fn = inputs[0];
-    MS_EXCEPTION_IF_NULL(fn);
     if (IsValueNode<FuncGraph>(fn)) {
       auto fg = GetValueNode<FuncGraphPtr>(fn);
       if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
@@ -235,6 +258,15 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) {
           MS_EXCEPTION_IF_NULL(ms_context);
           ms_context->set_enable_pynative_hook(true);
         }
+
+        if (backend_->name() == kMsConvert && prim->name() == prim::kPrimMakeTuple->name()) {
+          if (inputs.size() < 2) {
+            return false;
+          }
+          auto ret = IsSubGraph(inputs[1]);
+          return ret;
+        }
+
         return true;
       }
     }
@@ -466,6 +498,8 @@ int CompileGraph::InterpretNode(const FuncGraphPtr &graph, const CNodePtr &node)
     } else if (IsPrimitive(fn, prim::kPrimSwitch)) {
       AddSwitch(node);
       AddSinkSwitch(node);
+    } else if (IsPrimitive(fn, prim::kPrimSwitchLayer)) {
+      AddSwitchLayer(node);
     } else if (IsPrimitive(fn, prim::kPrimMakeTuple)) {
       AddMakeTuple(node);
     } else {
@@ -622,6 +656,17 @@ void CompileGraph::AddSwitch(const CNodePtr &node) {
   AddInst(Instruction::kSwitch, args);
 }
 
+void CompileGraph::AddSwitchLayer(const CNodePtr &node) {
+  auto inputs = node->inputs();
+  if (inputs.size() != 3) {
+    MS_LOG(EXCEPTION) << "Switch layer must have index and branches.";
+  }
+  VectorRef args;
+  args.emplace_back(Ref(inputs[1]));
+  args.emplace_back(Ref(inputs[2]));
+  AddInst(Instruction::kSwitchLayer, args);
+}
+
 void CompileGraph::AddReturn(const CNodePtr &node) {
   VectorRef args;
   if (backend_->simu_flag()) {
diff --git a/mindspore/ccsrc/vm/transform.h b/mindspore/ccsrc/vm/transform.h
index a02478fc1b..d08a24d188 100644
--- a/mindspore/ccsrc/vm/transform.h
+++ b/mindspore/ccsrc/vm/transform.h
@@ -28,7 +28,7 @@
 
 #include "vm/vm.h"
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/segment_runner.h"
 #include "vm/backend.h"
 
@@ -90,6 +90,7 @@ class CompileGraph {
   void AddPartial(const CNodePtr &node);
   void AddMakeTuple(const CNodePtr &node);
   void AddSwitch(const CNodePtr &node);
+  void AddSwitchLayer(const CNodePtr &node);
   void AddReturn(const CNodePtr &node);
   void AddPrimitive(const CNodePtr &node, const PrimitivePtr &prim);
   void AddInput(const AnfNodePtr &node);
diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc
index c73d41df6c..baa5b0ea11 100644
--- a/mindspore/ccsrc/vm/vm.cc
+++ b/mindspore/ccsrc/vm/vm.cc
@@ -23,7 +23,7 @@
 #include "vm/vmimpl.h"
 #include "vm/backend.h"
 #include "vm/transform.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "utils/base_ref_extends.h"
 
 namespace mindspore {
@@ -480,6 +480,36 @@ void FinalVM::InstSwitch(const VectorRef &args) {
   MS_LOG(DEBUG) << "End";
 }
 
+void FinalVM::InstSwitchLayer(const VectorRef &args) {
+  MS_LOG(DEBUG) << "Start";
+  const size_t args_size = 2;
+  if (args.size() != args_size) {
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
+    return;
+  }
+
+  int idx = utils::cast<int>(args[0]);
+  VectorRef branches = utils::cast<VectorRef>(Ref(utils::cast<int>(args[1])));
+  int size = static_cast<int>(branches.size());
+
+  BaseRef index = Ref(idx);
+  int idx_value = 0;
+  if (!backend_->GetIndex(index, &idx_value)) {
+    MS_LOG(EXCEPTION) << "Not supported type to be casted to int.";
+  }
+  if (idx_value < 0) {
+    // Add support negative index range [-size, -1].
+    idx_value += size;
+  }
+  if (idx_value < 0 || idx_value >= size) {
+    MS_LOG(EXCEPTION) << __FUNCTION__ << " given index " << idx_value << " out of range. Please make sure the value "
+                      << "of index in [" << -size << ", " << size << "), and the type is int32.";
+  }
+  Push(branches[idx_value]);
+  MS_LOG(DEBUG) << "End";
+}
+
 void FinalVM::InstTuple(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   VectorRef tuple;
@@ -618,57 +648,8 @@ void FinalVM::SyncData(const py::object &arg) {
 
 BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) {
   MS_LOG(DEBUG) << "input for operation:";
-  auto prim_py = dyn_cast<PrimitivePy>(prim);
-  std::size_t args_size = args.size();
-  auto py_args = py::tuple(args_size);
-  size_t i = 0;
-  for (auto &arg : args) {
-    py_args[i] = BaseRefToPyData(arg);
-    MS_LOG(DEBUG) << "arg: " << i << ":";
-    i++;
-  }
-  // Hook operator for execute cell custom bprop function
-  py::object obj;
-  bool is_bprop = prim->HasAttr("bprop");
-  if (is_bprop) {
-    SyncData(py_args);
-    py::function fn_bprop = prim_py->hook();
-    obj = fn_bprop(*py_args);
-    return obj;
-  }
-  // Sync gradient data from device to host
-  SyncData(py_args[2]);
-  bool is_cell = prim->HasAttr("cell_hook");
-  if (is_cell) {
-    // Hook operator for execute cell hook function
-    std::string cell_id = GetValue<std::string>(prim->GetAttr("cell_id"));
-    if (_hook_grad.find(cell_id) != _hook_grad.end()) {
-      std::size_t hook_args_size = 3;
-      auto hook_args = py::tuple(hook_args_size);
-      hook_args[0] = cell_id;
-      hook_args[1] = py::make_tuple(_hook_grad[cell_id]);
-      hook_args[2] = py::make_tuple(py_args[2]);
-      py::function fn_hook = prim_py->hook();
-      obj = fn_hook(*hook_args);
-      if (py::isinstance<py::none>(obj)) {
-        obj = py_args[2];
-      }
-      _hook_grad.erase(cell_id);
-    } else {
-      _hook_grad[cell_id] = py_args[2];
-      obj = py_args[2];
-    }
-  } else {
-    // Hook operator for execute variable hook function
-    py::function fn_hook = prim_py->hook();
-    obj = fn_hook(py::make_tuple(py_args[2]));
-    if (py::isinstance<py::none>(obj)) {
-      obj = py_args[2];
-    }
-  }
-  obj = py::make_tuple(obj);
-  return obj;
+  MS_EXCEPTION_IF_NULL(prim);
+  return prim->RunHookFunction(args);
 }
-
 }  // namespace compile
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/vm/vm.h b/mindspore/ccsrc/vm/vm.h
index 6a078c9baf..02a1ad4ddb 100644
--- a/mindspore/ccsrc/vm/vm.h
+++ b/mindspore/ccsrc/vm/vm.h
@@ -51,15 +51,17 @@ enum Instruction {
   kPush,
   kPrim,
   kGraph,
-  kPadStack
+  kPadStack,
+  kSwitchLayer
 };
 
 using InstType = std::pair<Instruction, VectorRef>;
 using InstSet = std::vector<InstType>;
 using InstFunctionMap = std::map<Instruction, std::function<void(const VectorRef &)>>;
 
-const std::vector<std::string> inst_str{"call",  "tail_call", "return", "partial",   "switch", "switch_return", "tuple",
-                                        "input", "external",  "push",   "primitive", "graph",  "pad_stack"};
+const std::vector<std::string> inst_str{"call",          "tail_call", "return",    "partial",     "switch",
+                                        "switch_return", "tuple",     "input",     "external",    "push",
+                                        "primitive",     "graph",     "pad_stack", "switch_layer"};
 class StructPartial : public Base {
  public:
   // Initialize StructPartial.
@@ -114,6 +116,7 @@ class FinalVM {
   void InstExternal(const VectorRef &args);
   void InstPushPrim(const VectorRef &args);
   void InstSwitchReturn(const VectorRef &args);
+  void InstSwitchLayer(const VectorRef &args);
   void set_insts(const InstSet &value) { insts_ = value; }
   BaseRef RunHook(const PrimitivePtr &prim, const VectorRef &arg);
 
@@ -157,8 +160,7 @@ class FinalVM {
     {Instruction::kExternal, [this](const VectorRef &args) { InstExternal(args); }},
     {Instruction::kPrim, [this](const VectorRef &args) { InstPushPrim(args); }},
     {Instruction::kSwitchReturn, [this](const VectorRef &args) { InstSwitchReturn(args); }},
-  };
-  std::map<std::string, py::object> _hook_grad;
+    {Instruction::kSwitchLayer, [this](const VectorRef &args) { InstSwitchLayer(args); }}};
 };
 
 using FinalVMPtr = std::shared_ptr<FinalVM>;
diff --git a/mindspore/ccsrc/vm/vmimpl.cc b/mindspore/ccsrc/vm/vmimpl.cc
index 51b2c9b3d5..2aebf8ad0d 100644
--- a/mindspore/ccsrc/vm/vmimpl.cc
+++ b/mindspore/ccsrc/vm/vmimpl.cc
@@ -27,10 +27,10 @@
 #include <set>
 
 #include "ir/tensor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "utils/convert_utils.h"
 #include "utils/primitive_utils.h"
 #include "debug/draw.h"
diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index 571cc9cb40..1605ee4bc5 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -17,11 +17,11 @@
 import numbers
 from copy import copy
 from mindspore import context
+from .._c_expression import ParamValue
 from . import dtype as mstype
 from .initializer import initializer, Initializer
 from .tensor import Tensor, MetaTensor
 from .._checkparam import _check_str_by_regular
-from ..parallel._utils import _set_clone_info, _CloneInfo
 from ..parallel._tensor import _get_slice_index
 
 __all__ = ['Parameter', 'ParameterTuple']
@@ -51,34 +51,33 @@ class Parameter:
         requires_grad (bool): True if the parameter requires gradient. Default: True.
         layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
             broadcast and gradients communication would not be applied on parameters. Default: False.
-        sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
-        has_indexed_slices (bool): Set if the parameter's gradient is indexed_slices. Default: false.
     """
-    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False,
-                 sparse_grad="", has_indexed_slices_grad=False):
+    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False):
+        self._value = ParamValue()
         self.set_parameter_data(default_input)
         self.name = name
         self.requires_grad = requires_grad
         self.layerwise_parallel = layerwise_parallel
-        self.sparse_grad = sparse_grad
-        self.has_indexed_slices_grad = has_indexed_slices_grad
         self._is_init = False
         self._sliced = False
-        self.clone_info = _CloneInfo()
+        self.is_param_ps = False
         if context.get_context("mode") == context.PYNATIVE_MODE:
             self.init_data()
 
     def __repr__(self):
         format_str = 'Parameter (name={name})'
-        return format_str.format(name=self._name)
+        return format_str.format(name=self._value.name)
 
     def __parameter__(self):
         """For parse check."""
 
+    def set_param_ps(self):
+        self.is_param_ps = True
+
     @property
     def name(self):
         """Get the name of the parameter."""
-        return self._name
+        return self._value.name
 
     @name.setter
     def name(self, name_):
@@ -100,7 +99,7 @@ class Parameter:
                                  format(name_, PARAMETER_NAME_PREFIX_MAX_LEN))
         else:
             raise ValueError("The type of the name should be `str` or `None`.")
-        self._name = name_
+        self._value.name = name_
 
     @property
     def sliced(self):
@@ -140,7 +139,9 @@ class Parameter:
         """
         _check_str_by_regular(prefix)
         x = copy(self)
-        x.name = prefix + '.' + x.name
+        # pylint: disable=protected-access
+        x._value = self._value.clone()
+        x._value.name = prefix + '.' + self._value.name
         x.is_init = False
         if init != 'same':
             shape = self.default_input.shape
@@ -152,57 +153,41 @@ class Parameter:
                     x.init_data()
             else:
                 x.default_input = initializer(init, shape=shape, dtype=dtype)
-
-        x.clone_info = copy(self.clone_info)
-        _set_clone_info(self.clone_info, x.clone_info)
         return x
 
     @property
     def layerwise_parallel(self):
-        return self._layerwise_parallel
+        return self._value.layerwise_parallel
 
     @layerwise_parallel.setter
     def layerwise_parallel(self, value=True):
         if not isinstance(value, bool):
             raise TypeError("`layerwise_parallel` parameter must be bool type")
-        self._layerwise_parallel = value
+        self._value.layerwise_parallel = value
 
     @property
     def requires_grad(self):
         """Return whether the parameter requires gradient."""
-        return self._requires_grad
+        return self._value.requires_grad
 
     @requires_grad.setter
     def requires_grad(self, value=True):
         if not isinstance(value, bool):
             raise TypeError("`requires_grad` parameter must be bool type")
-        self._requires_grad = value
+        self._value.requires_grad = value
 
     @property
-    def sparse_grad(self):
-        """Return whether the parameter's gradient is sparse."""
-        return self._sparse_grad
-
-    @sparse_grad.setter
-    def sparse_grad(self, value=""):
-        if not isinstance(value, str):
-            raise TypeError("`sparse_grad` parameter must be str type")
-        self._sparse_grad = value
+    def data(self):
+        return self.default_input
 
     @property
-    def has_indexed_slices_grad(self):
-        """Return whether the parameter's gradient is indexed_slices."""
-        return self._has_indexed_slices_grad
-
-    @has_indexed_slices_grad.setter
-    def has_indexed_slices_grad(self, value=False):
-        if not isinstance(value, bool):
-            raise TypeError("`has_indexed_slices_grad` parameter must be bool type")
-        self._has_indexed_slices_grad = value
+    def default_input(self):
+        return self._data
 
-    @property
-    def data(self):
-        return self.default_input
+    @default_input.setter
+    def default_input(self, data):
+        self._data = data
+        self._value.data = data
 
     def __add__(self, other):
         return self.default_input + other
@@ -223,11 +208,12 @@ class Parameter:
 
     def set_parameter_data(self, data):
         """Set `default_input` of current `Parameter`."""
+        self.init_mode = None
         if isinstance(data, bool):
             raise ValueError('Parameter data can not be `bool`')
         if isinstance(data, Tensor):
             # make a copy of Tensor to init the parameter
-            data = Tensor(data.asnumpy().copy())
+            data = Tensor(data.asnumpy())
             data.init_flag = False
         elif isinstance(data, Initializer):
             self.init_mode = data
@@ -242,7 +228,6 @@ class Parameter:
 
         self.default_input = data
 
-
     def init_data(self, layout=None, set_sliced=False):
         """
         Init data of the parameter.
@@ -256,7 +241,7 @@ class Parameter:
             set_sliced (bool): True if should set parameter sliced after init the data of initializer.
                 Default: False.
         """
-        if not isinstance(self.default_input, MetaTensor):
+        if self.init_mode is None:
             return
         if layout is not None:
             if not isinstance(layout, list):
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index 043ab4f6cf..64a8eb4637 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -73,7 +73,6 @@ class Tensor(Tensor_):
         else:
             Tensor_.__init__(self, input_data, dtype)
         self._virtual_flag = False
-        self._init_flag = False
 
     def __repr__(self):
         return str(self.__str__())
@@ -182,6 +181,9 @@ class Tensor(Tensor_):
     def __imod__(self, other):
         return self.__mod__(other)
 
+    def __pow__(self, other):
+        return tensor_operator_registry.get('__pow__')(self, other)
+
     def __floordiv__(self, other):
         return tensor_operator_registry.get('__floordiv__')(self, other)
 
@@ -205,19 +207,6 @@ class Tensor(Tensor_):
             raise TypeError("virtual_flag must be bool.")
         self._virtual_flag = value
 
-    @property
-    def init_flag(self):
-        """whether the tensor is init."""
-        return self._init_flag
-
-    @init_flag.setter
-    def init_flag(self, value):
-        """Set the tensor is init_flag."""
-        if not isinstance(value, bool):
-            raise TypeError("init_flag must be bool.")
-        self.set_init_flag(value)
-        self._init_flag = value
-
 
 class IndexedSlices:
     def __init__(self, indices, values, dense_shape):
diff --git a/mindspore/communication/_comm_helper.py b/mindspore/communication/_comm_helper.py
index 508aa2e7a9..5e1f7d06e7 100644
--- a/mindspore/communication/_comm_helper.py
+++ b/mindspore/communication/_comm_helper.py
@@ -14,7 +14,7 @@
 # ============================================================================
 """comm_helper"""
 
-
+import os
 from ._hccl_management import load_lib as hccl_load_lib
 
 _HCCL_AVAILABLE = False
@@ -44,7 +44,7 @@ else:
 
 HCCL_WORLD_COMM_GROUP = "hccl_world_group"
 NCCL_WORLD_COMM_GROUP = "nccl_world_group"
-
+MS_ROLE = os.getenv("MS_ROLE")
 
 class Backend:
     """
@@ -152,6 +152,9 @@ def _get_rank_helper(group, backend):
         Integer. The local rank id of the calling process.
     """
     rank_id = None
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        rank_id = 0
+        return rank_id
     if backend == Backend.HCCL:
         if group == HCCL_WORLD_COMM_GROUP:
             rank_id = hccl.get_rank_id()
@@ -211,6 +214,9 @@ def _get_size_helper(group, backend):
         Integer. The rank size of specified group.
     """
     size = None
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        size = 1
+        return size
     if backend == Backend.HCCL:
         if group == HCCL_WORLD_COMM_GROUP:
             size = hccl.get_rank_size()
diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py
index 1cd60fe2e5..3fb4e7b947 100755
--- a/mindspore/communication/management.py
+++ b/mindspore/communication/management.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """Communication management API"""
+import os
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from ._comm_helper import Backend, _get_rank_helper, _get_size_helper, \
     _get_world_rank_from_group_rank_helper, _get_group_rank_from_world_rank_helper, \
@@ -28,6 +29,7 @@ __all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size",
 
 DEFAULT_WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
 DEFAULT_BACKEND = Backend("hccl")
+MS_ROLE = os.getenv("MS_ROLE")
 
 
 def _get_group(group):
@@ -58,6 +60,8 @@ def init(backend_name="hccl"):
         TypeError: If backend name is not a string.
         RuntimeError: If backend is invalid or distributed init fails.
     """
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        return
     if not isinstance(backend_name, str):
         raise TypeError("Backend name must be a string, but got {}".format(type(backend_name)))
 
diff --git a/mindspore/context.py b/mindspore/context.py
index b5be6c3213..0de6084caf 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -17,6 +17,7 @@ The context of mindspore, used to configure the current execution environment,
 including execution mode, execution backend and other feature switches.
 """
 import os
+import time
 import threading
 from collections import namedtuple
 from types import FunctionType
@@ -55,12 +56,20 @@ def _make_directory(path):
             os.makedirs(path)
             real_path = path
         except PermissionError as e:
-            logger.error(
-                f"No write permission on the directory `{path}, error = {e}")
+            logger.error(f"No write permission on the directory `{path}, error = {e}")
             raise ValueError(f"No write permission on the directory `{path}`.")
     return real_path
 
 
+def _get_print_file_name(file_name):
+    """Add timestamp suffix to file name. Rename the file name:  file_name + "." + time(seconds)."""
+    time_second = str(int(time.time()))
+    file_name = file_name + "." + time_second
+    if os.path.exists(file_name):
+        ValueError("This file {} already exists.".format(file_name))
+    return file_name
+
+
 class _ThreadLocalInfo(threading.local):
     """
     Thread local Info used for store thread local attributes.
@@ -209,6 +218,8 @@ class _Context:
         success = self._context_handle.set_device_target(target)
         if not success:
             raise ValueError("Target device name is invalid!!!")
+        if self.enable_debug_runtime and self.device_target == "CPU":
+            self.set_backend_policy("vm")
 
     @property
     def device_id(self):
@@ -355,14 +366,6 @@ class _Context:
     def check_bprop(self, check_bprop_flag):
         self._context_handle.set_check_bprop_flag(check_bprop_flag)
 
-    @property
-    def enable_sparse(self):
-        return self._context_handle.get_enable_sparse_flag()
-
-    @enable_sparse.setter
-    def enable_sparse(self, enable_sparse_flag):
-        self._context_handle.set_enable_sparse_flag(enable_sparse_flag)
-
     @property
     def max_device_memory(self):
         return self._context_handle.get_max_device_memory()
@@ -381,9 +384,28 @@ class _Context:
         return None
 
     @print_file_path.setter
-    def print_file_path(self, file):
-        self._context_handle.set_print_file_path(file)
+    def print_file_path(self, file_path):
+        """Add timestamp suffix to file name. Sets print file path."""
+        print_file_path = os.path.realpath(file_path)
+        if os.path.isdir(print_file_path):
+            raise IOError("Print_file_path should be file path, but got {}.".format(file_path))
+
+        if os.path.exists(print_file_path):
+            _path, _file_name = os.path.split(print_file_path)
+            path = _make_directory(_path)
+            file_name = _get_print_file_name(_file_name)
+            full_file_name = os.path.join(path, file_name)
+        else:
+            full_file_name = print_file_path
+        self._context_handle.set_print_file_path(full_file_name)
+
+    @property
+    def enable_sparse(self):
+        return self._context_handle.get_enable_sparse()
 
+    @enable_sparse.setter
+    def enable_sparse(self, enable_sparse):
+        self._context_handle.set_enable_sparse(enable_sparse)
 
 def check_input_format(x):
     import re
@@ -575,8 +597,9 @@ def set_context(**kwargs):
         max_device_memory (str): Sets the maximum memory available for device, currently only supported on GPU.
             The format is "xxGB". Default: "1024GB".
         print_file_path (str): The path of print data to save. If this parameter is set, print data is saved to
-            a file by default, and turn off printing to the screen.
-        enable_sparse (bool): Whether to enable sparse feature. Default: False.
+            a file by default, and turn off printing to the screen. If the file already exists, add a timestamp
+            suffix to the file.
+        enable_sparse (bool): Whether to enable sparsity feature. Default: False.
 
     Raises:
         ValueError: If input key is not an attribute in context.
diff --git a/mindspore/core/abstract/CMakeLists.txt b/mindspore/core/abstract/CMakeLists.txt
new file mode 100644
index 0000000000..fa331776b3
--- /dev/null
+++ b/mindspore/core/abstract/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE _ABSTRACT_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_ABSTRACT_ALL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ABSTRACT)
+add_library(_mindspore_abstract_obj OBJECT ${_ABSTRACT_ALL_SRC_FILES})
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc b/mindspore/core/abstract/abstract_value.cc
similarity index 95%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
rename to mindspore/core/abstract/abstract_value.cc
index b59545e5ae..7bef3829a6 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
+++ b/mindspore/core/abstract/abstract_value.cc
@@ -16,13 +16,12 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 #include <algorithm>
 
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
@@ -55,7 +54,6 @@ ValuePtr AbstractBase::BuildValue() const {
 AbstractBasePtr AbstractBase::Broaden() const {
   AbstractBasePtr clone = Clone();
   clone->set_value(kAnyValue);
-  clone->set_sparse_grad(sparse_grad_);
   return clone;
 }
 
@@ -68,8 +66,7 @@ std::string AbstractBase::ToString() const {
   MS_EXCEPTION_IF_NULL(type_);
   MS_EXCEPTION_IF_NULL(shape_);
   buffer << type_name() << "("
-         << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString()
-         << " sparse_grad: " << sparse_grad_ << " has_indexed_slices_grad: " << has_indexed_slices_grad_ << ")";
+         << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString() << ")";
   return buffer.str();
 }
 
@@ -78,25 +75,16 @@ AbstractBasePtr AbstractScalar::Broaden() const { return AbstractBase::Broaden()
 AbstractBasePtr AbstractScalar::Join(const AbstractBasePtr &other) {
   MS_EXCEPTION_IF_NULL(other);
   if (*this == *other) {
-    auto ret = shared_from_base<AbstractBase>();
-    ret->set_sparse_grad(sparse_grad());
-    ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-    return ret;
+    return shared_from_base<AbstractBase>();
   }
   auto value_self = GetValueTrack();
   MS_EXCEPTION_IF_NULL(value_self);
   ValuePtr res_value = ValueJoin(value_self, other->GetValueTrack());
   TypePtr res_type = TypeJoin(GetTypeTrack(), other->GetTypeTrack());
   if (res_value == value_self) {
-    auto ret = shared_from_base<AbstractBase>();
-    ret->set_sparse_grad(sparse_grad());
-    ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-    return ret;
+    return shared_from_base<AbstractBase>();
   }
-  auto ret = std::make_shared<AbstractScalar>(res_value, res_type);
-  ret->set_sparse_grad(sparse_grad());
-  ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractScalar>(res_value, res_type);
 }
 
 AbstractBasePtr AbstractType::Clone() const {
@@ -452,16 +440,11 @@ AbstractBasePtr AbstractTensor::Join(const AbstractBasePtr &other) {
     MS_LOG(EXCEPTION) << "Join failed as type mismatch, this: " << ToString() << ", other: " << other->ToString();
   }
   if (*this == *other) {
-    if (sparse_grad() == other->sparse_grad()) {
-      return shared_from_base<AbstractBase>();
-    }
+    return shared_from_base<AbstractBase>();
   }
   auto element = element_->Join(other_tensor->element_);
   auto shape = ShapeJoin(this->shape(), other_tensor->shape());
-  auto ret = std::make_shared<AbstractTensor>(element, shape);
-  ret->set_sparse_grad(sparse_grad());
-  ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractTensor>(element, shape);
 }
 
 bool AbstractTensor::operator==(const AbstractTensor &other) const {
@@ -501,8 +484,6 @@ AbstractBasePtr AbstractTensor::Clone() const {
   ShapePtr shp = shape();
   clone->set_shape(shp->Clone());
   clone->set_value(GetValueTrack());
-  clone->set_sparse_grad(sparse_grad());
-  clone->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return clone;
 }
 
@@ -512,8 +493,6 @@ AbstractBasePtr AbstractTensor::Broaden() const {
   auto shp = shape();
   broaden->set_shape(shp->Clone());
   broaden->set_value(kAnyValue);
-  broaden->set_sparse_grad(sparse_grad());
-  broaden->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return broaden;
 }
 
@@ -524,8 +503,6 @@ AbstractBasePtr AbstractTensor::BroadenWithShape() const {
   shp->Broaden();
   broaden->set_shape(shp);
   broaden->set_value(kAnyValue);
-  broaden->set_sparse_grad(sparse_grad());
-  broaden->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return broaden;
 }
 
@@ -538,8 +515,7 @@ std::string AbstractTensor::ToString() const {
   MS_EXCEPTION_IF_NULL(value_track);
   buffer << type_name() << "("
          << "shape: " << shape_track->ToString() << ", element: " << element_->ToString()
-         << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << " sparse_grad " << sparse_grad()
-         << " has_indexed_slices_grad " << has_indexed_slices_grad() << ")";
+         << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << ")";
   return buffer.str();
 }
 
@@ -838,7 +814,8 @@ bool AbstractRef::operator==(const AbstractBase &other) const {
 AbstractBasePtr AbstractRef::Join(const AbstractBasePtr &other) {
   auto other_ref = other->cast<AbstractRefPtr>();
   if (other_ref == nullptr) {
-    MS_LOG(EXCEPTION) << "Join failed as type mismatch, this: " << ToString() << ", other: " << other->ToString();
+    auto new_ref = ref_->Join(other);
+    return std::make_shared<AbstractRef>(ref_key_, new_ref, ref_origin_);
   }
   if (*this == *other) {
     return shared_from_base<AbstractBase>();
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h b/mindspore/core/abstract/abstract_value.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
rename to mindspore/core/abstract/abstract_value.h
index 3981a6eb23..d922f93e70 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
+++ b/mindspore/core/abstract/abstract_value.h
@@ -16,8 +16,8 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
-#define PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
+#define MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
 
 #include <utility>
 #include <vector>
@@ -27,11 +27,11 @@
 
 #include "utils/log_adapter.h"
 #include "utils/hashing.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "ir/value.h"
 #include "ir/tensor.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace abstract {
@@ -44,7 +44,7 @@ class AbstractBase : public Base {
  public:
   explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
                         const BaseShapePtr &shape = kNoShape)
-      : value_(value), type_(type), shape_(shape), sparse_grad_(""), has_indexed_slices_grad_(false) {}
+      : value_(value), type_(type), shape_(shape) {}
   ~AbstractBase() override = default;
   MS_DECLARE_PARENT(AbstractBase, Base)
 
@@ -53,17 +53,11 @@ class AbstractBase : public Base {
 
   virtual bool operator==(const AbstractBase &other) const;
   void set_value(const ValuePtr &value) { value_ = value; }
-  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
-  void set_has_indexed_slices_grad(const bool &has_indexed_slices_grad) {
-    has_indexed_slices_grad_ = has_indexed_slices_grad;
-  }
   void set_type(const TypePtr &type) { type_ = type; }
   void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
   void set_value_desc(const std::string &desc) { value_desc_ = desc; }
   const std::string &value_desc() const { return value_desc_; }
   ValuePtr GetValueTrack() const { return value_; }
-  const std::string &sparse_grad() const { return sparse_grad_; }
-  const bool &has_indexed_slices_grad() const { return has_indexed_slices_grad_; }
   TypePtr GetTypeTrack() const { return type_; }
   BaseShapePtr GetShapeTrack() const { return shape_; }
 
@@ -91,8 +85,6 @@ class AbstractBase : public Base {
   TypePtr type_;
   BaseShapePtr shape_;
   std::string value_desc_;  // store initial value description for error report
-  std::string sparse_grad_;
-  bool has_indexed_slices_grad_;
 };
 
 class AbstractScalar : public AbstractBase {
@@ -631,4 +623,4 @@ class AbstractIndexedSlices : public AbstractUndetermined {
 };
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc b/mindspore/core/abstract/analysis_context.cc
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
rename to mindspore/core/abstract/analysis_context.cc
index 4a43b14168..1ae6125838 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
+++ b/mindspore/core/abstract/analysis_context.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/analysis_context.h"
+#include "abstract/analysis_context.h"
 
 #include <algorithm>
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h b/mindspore/core/abstract/analysis_context.h
similarity index 93%
rename from mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
rename to mindspore/core/abstract/analysis_context.h
index c0b3403702..c0293d7e91 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
+++ b/mindspore/core/abstract/analysis_context.h
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
-#define PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
+#define MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
 
 #include <memory>
 #include <string>
 #include <unordered_map>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/meta_func_graph.h"
 
 namespace mindspore {
@@ -85,4 +85,4 @@ struct ContextEqual {
 extern const AnalysisContextPtr kDummyAnalysisContext;
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/dshape.cc b/mindspore/core/abstract/dshape.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/dshape.cc
rename to mindspore/core/abstract/dshape.cc
index 183ec772ff..74ea1ff7bf 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/dshape.cc
+++ b/mindspore/core/abstract/dshape.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 #include <exception>
 #include <iostream>
diff --git a/mindspore/ccsrc/pipeline/static_analysis/dshape.h b/mindspore/core/abstract/dshape.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/dshape.h
rename to mindspore/core/abstract/dshape.h
index 3e850e309b..b9b8e93292 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/dshape.h
+++ b/mindspore/core/abstract/dshape.h
@@ -16,8 +16,8 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
-#define PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
+#define MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
 
 #include <vector>
 #include <string>
@@ -27,7 +27,7 @@
 #include <memory>
 
 #include "utils/log_adapter.h"
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace abstract {
@@ -132,4 +132,4 @@ using ListShapePtr = std::shared_ptr<ListShape>;
 }  // namespace abstract
 }  // namespace mindspore
 
-#endif  // PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc b/mindspore/core/abstract/param_validator.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
rename to mindspore/core/abstract/param_validator.cc
index 2cbd33c162..69fe88b4a3 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
+++ b/mindspore/core/abstract/param_validator.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
+#include "abstract/param_validator.h"
 
 #include <string>
 #include <sstream>
 #include <memory>
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h b/mindspore/core/abstract/param_validator.h
similarity index 93%
rename from mindspore/ccsrc/pipeline/static_analysis/param_validator.h
rename to mindspore/core/abstract/param_validator.h
index daa436d66d..434235abda 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h
+++ b/mindspore/core/abstract/param_validator.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
-#define PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
+#define MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
 
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/abstract_value.h"
+#include "abstract/utils.h"
 #include "utils/any.h"
 #include "ir/primitive.h"
 
@@ -97,4 +97,4 @@ void CheckArgsSpec(const AbstractBasePtrList &args_list) {
 }  // namespace abstract
 }  // namespace mindspore
 
-#endif  // PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.cc b/mindspore/core/abstract/utils.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/utils.cc
rename to mindspore/core/abstract/utils.cc
index 4c399f6ffc..16497c74a9 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/utils.cc
+++ b/mindspore/core/abstract/utils.cc
@@ -16,13 +16,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 #include <string>
 #include <sstream>
 #include <memory>
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.h b/mindspore/core/abstract/utils.h
similarity index 90%
rename from mindspore/ccsrc/pipeline/static_analysis/utils.h
rename to mindspore/core/abstract/utils.h
index 6a709ea99c..be38ae860d 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/utils.h
+++ b/mindspore/core/abstract/utils.h
@@ -16,18 +16,17 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_UTILS_H_
-#define PIPELINE_STATIC_ANALYSIS_UTILS_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
+#define MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
 
 #include <vector>
 #include <utility>
 #include <memory>
 #include <string>
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/any.h"
 #include "utils/misc.h"
 #include "utils/convert_utils.h"
-#include "ir/primitive.h"
 
 namespace mindspore {
 namespace abstract {
@@ -54,4 +53,4 @@ int GetPositiveAxis(int axis_value, size_t increment);
 ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tensor_x, const AbstractTensorPtr &tensor_y);
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_UTILS_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
diff --git a/mindspore/core/base/CMakeLists.txt b/mindspore/core/base/CMakeLists.txt
new file mode 100644
index 0000000000..d65b91a824
--- /dev/null
+++ b/mindspore/core/base/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE _BASE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_BASE_ALL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_BASE)
+add_library(_mindspore_base_obj OBJECT ${_BASE_ALL_SRC_FILES})
diff --git a/mindspore/ccsrc/ir/base.cc b/mindspore/core/base/base.cc
similarity index 98%
rename from mindspore/ccsrc/ir/base.cc
rename to mindspore/core/base/base.cc
index 7a03269ad8..07ed252e96 100644
--- a/mindspore/ccsrc/ir/base.cc
+++ b/mindspore/core/base/base.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/base.h"
+#include "base/base.h"
 #include <atomic>
 #include <mutex>
 #include <unordered_map>
diff --git a/mindspore/ccsrc/ir/base.h b/mindspore/core/base/base.h
similarity index 97%
rename from mindspore/ccsrc/ir/base.h
rename to mindspore/core/base/base.h
index 7dc4145837..8e1a447c0d 100644
--- a/mindspore/ccsrc/ir/base.h
+++ b/mindspore/core/base/base.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_BASE_H_
-#define MINDSPORE_CCSRC_IR_BASE_H_
+#ifndef MINDSPORE_CCSRC_BASE_BASE_H_
+#define MINDSPORE_CCSRC_BASE_BASE_H_
 
 #include <atomic>
 #include <iostream>
@@ -149,4 +149,4 @@ struct MS_EXPORT TypeIdManager {
 };
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_IR_BASE_H_
+#endif  // MINDSPORE_CCSRC_BASE_BASE_H_
diff --git a/mindspore/ccsrc/ir/CMakeLists.txt b/mindspore/core/ir/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/ir/CMakeLists.txt
rename to mindspore/core/ir/CMakeLists.txt
diff --git a/mindspore/ccsrc/ir/anf.cc b/mindspore/core/ir/anf.cc
similarity index 99%
rename from mindspore/ccsrc/ir/anf.cc
rename to mindspore/core/ir/anf.cc
index 4c1d2bf50d..0d96ddf263 100644
--- a/mindspore/ccsrc/ir/anf.cc
+++ b/mindspore/core/ir/anf.cc
@@ -24,9 +24,9 @@
 #include <unordered_map>
 
 #include "ir/func_graph.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 // namespace to support intermediate representation definition
diff --git a/mindspore/ccsrc/ir/anf.h b/mindspore/core/ir/anf.h
similarity index 98%
rename from mindspore/ccsrc/ir/anf.h
rename to mindspore/core/ir/anf.h
index 8a44627885..c1a28d57f1 100644
--- a/mindspore/ccsrc/ir/anf.h
+++ b/mindspore/core/ir/anf.h
@@ -26,9 +26,10 @@
 #include <unordered_map>
 #include <utility>
 
-#include "ir/base.h"
-#include "debug/info.h"
+#include "base/base.h"
+#include "ir/kernel_info_dev.h"
 #include "ir/scope.h"
+#include "debug/info.h"
 
 // A MindSpore ANF IR defined here.
 // with BNF followed:
@@ -71,19 +72,9 @@ class BaseRef;
 class Var;
 using VarPtr = std::shared_ptr<Var>;
 
-namespace device {
-class KernelInfo;
-}  // namespace device
-using KernelInfoDevice = device::KernelInfo;
-using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
-
 class AnfVisitor;
 
-class ParamValue {
- public:
-  ParamValue() = default;
-  virtual ~ParamValue() = default;
-};
+class ParamValue;
 using ParamValuePtr = std::shared_ptr<ParamValue>;
 
 // AnfNode is the basic class of the IR definition derived from Base.
diff --git a/mindspore/ccsrc/ir/anf_extends.cc b/mindspore/core/ir/anf_extends.cc
similarity index 97%
rename from mindspore/ccsrc/ir/anf_extends.cc
rename to mindspore/core/ir/anf_extends.cc
index 432ffdb606..b70a660aae 100644
--- a/mindspore/ccsrc/ir/anf_extends.cc
+++ b/mindspore/core/ir/anf_extends.cc
@@ -22,9 +22,9 @@
 #include <unordered_map>
 
 #include "ir/visitor.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "operator/ops.h"
-#include "parallel/ops_info/ops_utils.h"
+#include "ir/func_graph.h"
+#include "frontend/operator/ops.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 #include "debug/label.h"
 
 namespace mindspore {
diff --git a/mindspore/core/ir/anf_py.cc b/mindspore/core/ir/anf_py.cc
new file mode 100644
index 0000000000..d033dfff5a
--- /dev/null
+++ b/mindspore/core/ir/anf_py.cc
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "ir/anf.h"
+
+#include "pybind_api/api_register.h"
+
+namespace mindspore {
+// Define python 'RefKey' class.
+REGISTER_PYBIND_DEFINE(CNode, ([](const pybind11::module *m) {
+                         (void)py::class_<CNode, CNodePtr>(*m, "CNode")
+                           .def("expanded_str", (std::string(CNode::*)(int) const) & CNode::DebugString,
+                                "Get CNode string representation with specified expansion level.");
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/core/ir/device_sync.h b/mindspore/core/ir/device_sync.h
new file mode 100644
index 0000000000..a6bbe92233
--- /dev/null
+++ b/mindspore/core/ir/device_sync.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
+#define MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
+
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "ir/dtype/type.h"
+
+using std::string;
+
+namespace mindspore {
+// Interface for data synchornize between device and host.
+class DeviceSync {
+ public:
+  virtual bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const = 0;
+  virtual bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
+                                const void *host_ptr) const = 0;
+};
+using DeviceSyncPtr = std::shared_ptr<DeviceSync>;
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
diff --git a/mindspore/ccsrc/ir/dtype.cc b/mindspore/core/ir/dtype.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype.cc
rename to mindspore/core/ir/dtype.cc
diff --git a/mindspore/ccsrc/ir/dtype.h b/mindspore/core/ir/dtype.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype.h
rename to mindspore/core/ir/dtype.h
index f10c56e659..dc277c031c 100644
--- a/mindspore/ccsrc/ir/dtype.h
+++ b/mindspore/core/ir/dtype.h
@@ -28,7 +28,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 
 #include "ir/dtype/type.h"
diff --git a/mindspore/ccsrc/ir/dtype/container.cc b/mindspore/core/ir/dtype/container.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/container.cc
rename to mindspore/core/ir/dtype/container.cc
diff --git a/mindspore/ccsrc/ir/dtype/container.h b/mindspore/core/ir/dtype/container.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/container.h
rename to mindspore/core/ir/dtype/container.h
index 0612d24c4d..29579fe73c 100644
--- a/mindspore/ccsrc/ir/dtype/container.h
+++ b/mindspore/core/ir/dtype/container.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/empty.cc b/mindspore/core/ir/dtype/empty.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/empty.cc
rename to mindspore/core/ir/dtype/empty.cc
diff --git a/mindspore/ccsrc/ir/dtype/empty.h b/mindspore/core/ir/dtype/empty.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/empty.h
rename to mindspore/core/ir/dtype/empty.h
index e3b46ec7d9..e6149a1fce 100644
--- a/mindspore/ccsrc/ir/dtype/empty.h
+++ b/mindspore/core/ir/dtype/empty.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/number.cc b/mindspore/core/ir/dtype/number.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/number.cc
rename to mindspore/core/ir/dtype/number.cc
diff --git a/mindspore/ccsrc/ir/dtype/number.h b/mindspore/core/ir/dtype/number.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/number.h
rename to mindspore/core/ir/dtype/number.h
index f8a746f8d6..8997ddc4df 100644
--- a/mindspore/ccsrc/ir/dtype/number.h
+++ b/mindspore/core/ir/dtype/number.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/ref.cc b/mindspore/core/ir/dtype/ref.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/ref.cc
rename to mindspore/core/ir/dtype/ref.cc
diff --git a/mindspore/ccsrc/ir/dtype/ref.h b/mindspore/core/ir/dtype/ref.h
similarity index 98%
rename from mindspore/ccsrc/ir/dtype/ref.h
rename to mindspore/core/ir/dtype/ref.h
index 7d8159289f..e798d72af5 100644
--- a/mindspore/ccsrc/ir/dtype/ref.h
+++ b/mindspore/core/ir/dtype/ref.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/type.cc b/mindspore/core/ir/dtype/type.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/type.cc
rename to mindspore/core/ir/dtype/type.cc
diff --git a/mindspore/ccsrc/ir/dtype/type.h b/mindspore/core/ir/dtype/type.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/type.h
rename to mindspore/core/ir/dtype/type.h
index cba0d17fce..2e38e8ffb6 100644
--- a/mindspore/ccsrc/ir/dtype/type.h
+++ b/mindspore/core/ir/dtype/type.h
@@ -32,7 +32,7 @@
 #include <unordered_map>
 #include <algorithm>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type_id.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/type_extends.cc b/mindspore/core/ir/dtype/type_extends.cc
similarity index 93%
rename from mindspore/ccsrc/ir/dtype/type_extends.cc
rename to mindspore/core/ir/dtype/type_extends.cc
index a77a6a9cba..771a460c17 100644
--- a/mindspore/ccsrc/ir/dtype/type_extends.cc
+++ b/mindspore/core/ir/dtype/type_extends.cc
@@ -15,7 +15,7 @@
  */
 
 #include "ir/dtype/type.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 abstract::AbstractBasePtr Type::ToAbstract() {
diff --git a/mindspore/ccsrc/ir/dtype/type_id.h b/mindspore/core/ir/dtype/type_id.h
similarity index 88%
rename from mindspore/ccsrc/ir/dtype/type_id.h
rename to mindspore/core/ir/dtype/type_id.h
index a711779e91..6fb2a354c1 100644
--- a/mindspore/ccsrc/ir/dtype/type_id.h
+++ b/mindspore/core/ir/dtype/type_id.h
@@ -86,8 +86,8 @@ enum TypeId : int {
 // TypeId name map
 //
 const std::unordered_map<TypeId, std::string> type_name_map = {
-  {kNumberTypeBool, "Bool"},       {kNumberTypeInt8, "Int8"},       {kNumberTypeUInt8, "UInt8"},
-  {kNumberTypeInt16, "Int16"},     {kNumberTypeInt32, "Int32"},     {kNumberTypeInt64, "Int64"},
-  {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat32, "Float32"}, {kNumberTypeFloat64, "Float64"}};
+  {kNumberTypeBool, "bool_"},      {kNumberTypeInt8, "int8"},       {kNumberTypeUInt8, "uint8"},
+  {kNumberTypeInt16, "int16"},     {kNumberTypeInt32, "int32"},     {kNumberTypeInt64, "int64"},
+  {kNumberTypeFloat16, "float16"}, {kNumberTypeFloat32, "float32"}, {kNumberTypeFloat64, "float64"}};
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_IR_DTYPE_TYPE_ID_H_
diff --git a/mindspore/ccsrc/ir/dtype_extends.cc b/mindspore/core/ir/dtype_extends.cc
similarity index 69%
rename from mindspore/ccsrc/ir/dtype_extends.cc
rename to mindspore/core/ir/dtype_extends.cc
index 732872cb4f..099748217e 100644
--- a/mindspore/ccsrc/ir/dtype_extends.cc
+++ b/mindspore/core/ir/dtype_extends.cc
@@ -19,9 +19,7 @@
 #include <cstdlib>
 #include <algorithm>
 #include "utils/log_adapter.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pybind_api/api_register.h"
-#include "pybind_api/export_flags.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 TypePtr TypeAnything::DeepCopy() const { return kAnyType; }
@@ -425,134 +423,6 @@ bool IsSubType(TypePtr const &t1, TypePtr const &t2) {
   }
 }
 
-REGISTER_PYBIND_DEFINE(
-  typing, ([](py::module *const m) {
-    auto m_sub = m->def_submodule("typing", "submodule for dtype");
-    py::enum_<TypeId>(m_sub, "TypeId");
-    (void)m_sub.def("is_subclass", &IsIdentidityOrSubclass, "is equal or subclass");
-    (void)m_sub.def("load_type", &TypeIdToType, "load type");
-    (void)m_sub.def(
-      "dump_type", [](const TypePtr &t) { return t->type_id(); }, "dump type");
-    (void)m_sub.def("str_to_type", &StringToType, "string to typeptr");
-    (void)py::class_<Type, std::shared_ptr<Type>>(m_sub, "Type")
-      .def_readonly(PYTHON_DTYPE_FLAG, &mindspore::Type::parse_info_)
-      .def("__eq__",
-           [](const TypePtr &t1, const TypePtr &t2) {
-             if (t1 != nullptr && t2 != nullptr) {
-               return *t1 == *t2;
-             }
-             return false;
-           })
-      .def("__hash__", &Type::hash)
-      .def("__str__", &Type::ToString)
-      .def("__repr__", &Type::ReprString)
-      .def("__deepcopy__", [](const TypePtr &t, py::dict) {
-        if (t == nullptr) {
-          return static_cast<TypePtr>(nullptr);
-        }
-        return t->DeepCopy();
-      });
-    (void)py::class_<Number, Type, std::shared_ptr<Number>>(m_sub, "Number").def(py::init());
-    (void)py::class_<Bool, Type, std::shared_ptr<Bool>>(m_sub, "Bool")
-      .def(py::init())
-      .def(py::pickle(
-        [](const Bool &) {  // __getstate__
-          return py::make_tuple();
-        },
-        [](const py::tuple &) {  // __setstate__
-          return std::make_shared<Bool>();
-        }));
-    (void)py::class_<Int, Type, std::shared_ptr<Int>>(m_sub, "Int")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const Int &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          Int data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<UInt, Type, std::shared_ptr<UInt>>(m_sub, "UInt")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const UInt &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          UInt data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<Float, Type, std::shared_ptr<Float>>(m_sub, "Float")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const Float &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          Float data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<List, Type, std::shared_ptr<List>>(m_sub, "List")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
-    (void)py::class_<Tuple, Type, std::shared_ptr<Tuple>>(m_sub, "Tuple")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
-    (void)py::class_<TensorType, Type, std::shared_ptr<TensorType>>(m_sub, "TensorType")
-      .def(py::init())
-      .def(py::init<TypePtr>(), py::arg("element"))
-      .def("element_type", &TensorType::element)
-      .def(py::pickle(
-        [](const TensorType &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(static_cast<int>(t.element()->type_id())));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          TensorType data(TypeIdToType(TypeId(static_cast<int>(t[0].cast<py::int_>()))));
-          return data;
-        }));
-    (void)py::class_<IndexedSlicesType, Type, std::shared_ptr<IndexedSlicesType>>(m_sub, "IndexedSlicesType")
-      .def(py::init());
-    (void)py::class_<UndeterminedType, Type, std::shared_ptr<UndeterminedType>>(m_sub, "UndeterminedType")
-      .def(py::init());
-    (void)py::class_<Function, Type, std::shared_ptr<Function>>(m_sub, "Function")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>, TypePtr>(), py::arg("args"), py::arg("retval"));
-    (void)py::class_<Class, Type, std::shared_ptr<Class>>(m_sub, "Class").def(py::init());
-    (void)py::class_<SymbolicKeyType, Type, std::shared_ptr<SymbolicKeyType>>(m_sub, "SymbolicKeyType").def(py::init());
-    (void)py::class_<EnvType, Type, std::shared_ptr<EnvType>>(m_sub, "EnvType").def(py::init());
-    (void)py::class_<TypeNone, Type, std::shared_ptr<TypeNone>>(m_sub, "TypeNone").def(py::init());
-    (void)py::class_<TypeType, Type, std::shared_ptr<TypeType>>(m_sub, "TypeType").def(py::init());
-    (void)py::class_<String, Type, std::shared_ptr<String>>(m_sub, "String").def(py::init());
-    (void)py::class_<RefKeyType, Type, std::shared_ptr<RefKeyType>>(m_sub, "RefKeyType").def(py::init());
-    (void)py::class_<RefType, Type, std::shared_ptr<RefType>>(m_sub, "RefType").def(py::init());
-    (void)py::class_<TypeAnything, Type, std::shared_ptr<TypeAnything>>(m_sub, "TypeAnything").def(py::init());
-    (void)py::class_<Slice, Type, std::shared_ptr<Slice>>(m_sub, "Slice").def(py::init());
-    (void)py::class_<TypeEllipsis, Type, std::shared_ptr<TypeEllipsis>>(m_sub, "TypeEllipsis").def(py::init());
-  }));
-
 const TypePtr kTypeExternal = std::make_shared<External>();
 const TypePtr kTypeEnv = std::make_shared<EnvType>();
 const TypePtr kTypeType = std::make_shared<TypeType>();
diff --git a/mindspore/core/ir/dtype_py.cc b/mindspore/core/ir/dtype_py.cc
new file mode 100644
index 0000000000..66bd8ba5f6
--- /dev/null
+++ b/mindspore/core/ir/dtype_py.cc
@@ -0,0 +1,155 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/dtype.h"
+#include <string>
+#include <cstdlib>
+#include <algorithm>
+#include "utils/log_adapter.h"
+#include "abstract/abstract_value.h"
+#include "pybind_api/api_register.h"
+#include "pybind_api/export_flags.h"
+
+namespace mindspore {
+// Define python wrapper to handle data types.
+REGISTER_PYBIND_DEFINE(
+  typing, ([](py::module *const m) {
+    auto m_sub = m->def_submodule("typing", "submodule for dtype");
+    py::enum_<TypeId>(m_sub, "TypeId");
+    (void)m_sub.def("is_subclass", &IsIdentidityOrSubclass, "is equal or subclass");
+    (void)m_sub.def("load_type", &TypeIdToType, "load type");
+    (void)m_sub.def(
+      "dump_type", [](const TypePtr &t) { return t->type_id(); }, "dump type");
+    (void)m_sub.def("str_to_type", &StringToType, "string to typeptr");
+    (void)py::class_<Type, std::shared_ptr<Type>>(m_sub, "Type")
+      .def_readonly(PYTHON_DTYPE_FLAG, &mindspore::Type::parse_info_)
+      .def("__eq__",
+           [](const TypePtr &t1, const TypePtr &t2) {
+             if (t1 != nullptr && t2 != nullptr) {
+               return *t1 == *t2;
+             }
+             return false;
+           })
+      .def("__hash__", &Type::hash)
+      .def("__str__", &Type::ToString)
+      .def("__repr__", &Type::ReprString)
+      .def("__deepcopy__", [](const TypePtr &t, py::dict) {
+        if (t == nullptr) {
+          return static_cast<TypePtr>(nullptr);
+        }
+        return t->DeepCopy();
+      });
+    (void)py::class_<Number, Type, std::shared_ptr<Number>>(m_sub, "Number").def(py::init());
+    (void)py::class_<Bool, Type, std::shared_ptr<Bool>>(m_sub, "Bool")
+      .def(py::init())
+      .def(py::pickle(
+        [](const Bool &) {  // __getstate__
+          return py::make_tuple();
+        },
+        [](const py::tuple &) {  // __setstate__
+          return std::make_shared<Bool>();
+        }));
+    (void)py::class_<Int, Type, std::shared_ptr<Int>>(m_sub, "Int")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const Int &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          Int data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<UInt, Type, std::shared_ptr<UInt>>(m_sub, "UInt")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const UInt &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          UInt data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<Float, Type, std::shared_ptr<Float>>(m_sub, "Float")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const Float &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          Float data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<List, Type, std::shared_ptr<List>>(m_sub, "List")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
+    (void)py::class_<Tuple, Type, std::shared_ptr<Tuple>>(m_sub, "Tuple")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
+    (void)py::class_<TensorType, Type, std::shared_ptr<TensorType>>(m_sub, "TensorType")
+      .def(py::init())
+      .def(py::init<TypePtr>(), py::arg("element"))
+      .def("element_type", &TensorType::element)
+      .def(py::pickle(
+        [](const TensorType &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(static_cast<int>(t.element()->type_id())));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          TensorType data(TypeIdToType(TypeId(static_cast<int>(t[0].cast<py::int_>()))));
+          return data;
+        }));
+    (void)py::class_<IndexedSlicesType, Type, std::shared_ptr<IndexedSlicesType>>(m_sub, "IndexedSlicesType")
+      .def(py::init());
+    (void)py::class_<UndeterminedType, Type, std::shared_ptr<UndeterminedType>>(m_sub, "UndeterminedType")
+      .def(py::init());
+    (void)py::class_<Function, Type, std::shared_ptr<Function>>(m_sub, "Function")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>, TypePtr>(), py::arg("args"), py::arg("retval"));
+    (void)py::class_<Class, Type, std::shared_ptr<Class>>(m_sub, "Class").def(py::init());
+    (void)py::class_<SymbolicKeyType, Type, std::shared_ptr<SymbolicKeyType>>(m_sub, "SymbolicKeyType").def(py::init());
+    (void)py::class_<EnvType, Type, std::shared_ptr<EnvType>>(m_sub, "EnvType").def(py::init());
+    (void)py::class_<TypeNone, Type, std::shared_ptr<TypeNone>>(m_sub, "TypeNone").def(py::init());
+    (void)py::class_<TypeType, Type, std::shared_ptr<TypeType>>(m_sub, "TypeType").def(py::init());
+    (void)py::class_<String, Type, std::shared_ptr<String>>(m_sub, "String").def(py::init());
+    (void)py::class_<RefKeyType, Type, std::shared_ptr<RefKeyType>>(m_sub, "RefKeyType").def(py::init());
+    (void)py::class_<RefType, Type, std::shared_ptr<RefType>>(m_sub, "RefType").def(py::init());
+    (void)py::class_<TypeAnything, Type, std::shared_ptr<TypeAnything>>(m_sub, "TypeAnything").def(py::init());
+    (void)py::class_<Slice, Type, std::shared_ptr<Slice>>(m_sub, "Slice").def(py::init());
+    (void)py::class_<TypeEllipsis, Type, std::shared_ptr<TypeEllipsis>>(m_sub, "TypeEllipsis").def(py::init());
+  }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/core/ir/func_graph.cc
similarity index 99%
rename from mindspore/ccsrc/ir/func_graph.cc
rename to mindspore/core/ir/func_graph.cc
index 4e01e9003f..fabdd3e7d3 100644
--- a/mindspore/ccsrc/ir/func_graph.cc
+++ b/mindspore/core/ir/func_graph.cc
@@ -24,8 +24,7 @@
 
 #include "debug/trace.h"
 #include "ir/manager.h"
-#include "operator/ops.h"
-#include "pybind_api/export_flags.h"
+#include "frontend/operator/ops.h"
 #include "utils/ordered_set.h"
 #include "utils/convert_utils_base.h"
 
@@ -45,7 +44,8 @@ FuncGraph::FuncGraph()
       hyper_param_count_(0),
       is_generated_(false),
       return_(nullptr),
-      manager_(std::weak_ptr<FuncGraphManager>()) {
+      manager_(std::weak_ptr<FuncGraphManager>()),
+      stub_(false) {
   debug_info_ = std::make_shared<GraphDebugInfo>();
 }
 
diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/core/ir/func_graph.h
similarity index 99%
rename from mindspore/ccsrc/ir/func_graph.h
rename to mindspore/core/ir/func_graph.h
index b1be892a53..712c75b431 100644
--- a/mindspore/ccsrc/ir/func_graph.h
+++ b/mindspore/core/ir/func_graph.h
@@ -149,7 +149,6 @@ class FuncGraph : public FuncGraphBase {
 
   // get the graph's abstract
   abstract::AbstractFunctionPtr abstract();
-  abstract::AbstractBasePtr MakeAbstractClosure(const abstract::AnalysisContextPtr &context);
 
   // return the graph's output, or nullptr if not yet deduced
   AnfNodePtr output() const;
@@ -344,6 +343,9 @@ class FuncGraph : public FuncGraphBase {
   void SetEffectDepends(const std::vector<AnfNodePtr> &depend_inputs);
   bool HasEffect(const CNodePtr &cnode);
 
+  bool stub() const { return stub_; }
+  void set_stub(bool stub) { stub_ = stub; }
+
  private:
   // graph is manipulated by manager and others
   friend FuncGraphManager;
@@ -402,6 +404,7 @@ class FuncGraph : public FuncGraphBase {
 
   // CNode order which relates to origin code order
   std::list<CNodePtr> order_;
+  bool stub_;
 };
 
 inline CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs, const FuncGraphPtr &fg) {
diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/core/ir/func_graph_cloner.cc
similarity index 97%
rename from mindspore/ccsrc/ir/func_graph_cloner.cc
rename to mindspore/core/ir/func_graph_cloner.cc
index 4a0c69d99a..0857770cad 100644
--- a/mindspore/ccsrc/ir/func_graph_cloner.cc
+++ b/mindspore/core/ir/func_graph_cloner.cc
@@ -19,8 +19,8 @@
 #include <algorithm>
 
 #include "ir/manager.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
 #include "utils/convert_utils_base.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
@@ -71,9 +71,8 @@ void Cloner::CloneParameter(const AnfNodePtr &node, const FuncGraphPtr &target,
   new_param->set_abstract(old_param->abstract());
   new_param->set_name(old_param->name());
   if (old_param->has_default()) {
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(old_param->default_param());
-    auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-    new_param->set_default_param(param_value_new);
+    // Default parameter can be shared since it is readonly.
+    new_param->set_default_param(old_param->default_param());
   }
   ScopePtr scope = (node->scope() != kDefaultScope) ? node->scope() : this->scope();
   new_param->set_scope(scope);
@@ -219,6 +218,7 @@ void Cloner::SetFuncGraphInfo(const FuncGraphPtr &func_graph, FuncGraphPtr *cons
   (*target_func_graph)->set_kwonlyargs_count(func_graph->kwonlyargs_count());
   (*target_func_graph)->set_hyper_param_count(func_graph->hyper_param_count());
   (*target_func_graph)->set_is_generate(func_graph->is_generated());
+  (*target_func_graph)->set_stub(func_graph->stub());
   TraceManager::EndTrace();
 }
 
@@ -253,9 +253,8 @@ void Cloner::CloneParameter(const ParameterPtr &param, const AnfNodePtr &node) {
   if (node->isa<Parameter>()) {
     ParameterPtr old_param = dyn_cast<Parameter>(node);
     if (old_param->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(old_param->default_param());
-      auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-      param->set_default_param(param_value_new);
+      // Default parameter can be shared since it is readonly.
+      param->set_default_param(old_param->default_param());
     }
     param->set_name(old_param->name());
   }
@@ -631,6 +630,7 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP
   new_func_graph->set_kwonlyargs_count(func_graph->kwonlyargs_count());
   new_func_graph->set_hyper_param_count(func_graph->hyper_param_count());
   new_func_graph->set_is_generate(func_graph->is_generated());
+  new_func_graph->set_stub(func_graph->stub());
   for (auto &item : func_graph->parameter_default_value()) {
     new_func_graph->set_param_default_value(item.first, cloner[item.second]);
   }
diff --git a/mindspore/ccsrc/ir/func_graph_cloner.h b/mindspore/core/ir/func_graph_cloner.h
similarity index 100%
rename from mindspore/ccsrc/ir/func_graph_cloner.h
rename to mindspore/core/ir/func_graph_cloner.h
diff --git a/mindspore/ccsrc/ir/func_graph_extends.cc b/mindspore/core/ir/func_graph_extends.cc
similarity index 96%
rename from mindspore/ccsrc/ir/func_graph_extends.cc
rename to mindspore/core/ir/func_graph_extends.cc
index ad7aa6ee0c..579409b05e 100644
--- a/mindspore/ccsrc/ir/func_graph_extends.cc
+++ b/mindspore/core/ir/func_graph_extends.cc
@@ -22,12 +22,9 @@
 
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/ordered_set.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/abstract_function.h"
-
+#include "abstract/abstract_value.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/trace.h"
 #include "debug/draw.h"
@@ -60,14 +57,6 @@ AbstractFunctionPtr FuncGraph::abstract() {
   return std::make_shared<VirtualAbstractClosure>(args_spec_list, output()->abstract());
 }
 
-abstract::AbstractBasePtr FuncGraph::MakeAbstractClosure(const abstract::AnalysisContextPtr &context) {
-  AnalysisContextPtr temp_context = context;
-  if (temp_context == nullptr) {
-    temp_context = abstract::AnalysisContext::DummyContext();
-  }
-  return std::make_shared<abstract::FuncGraphAbstractClosure>(shared_from_base<FuncGraph>(), temp_context);
-}
-
 void FuncGraph::set_output(const AnfNodePtr &value, bool force_new_ret) {
   if (force_new_ret || return_ == nullptr) {
     std::vector<AnfNodePtr> params({NewValueNode(prim::kPrimReturn), value});
diff --git a/mindspore/core/ir/func_graph_py.cc b/mindspore/core/ir/func_graph_py.cc
new file mode 100644
index 0000000000..cff25b5aa1
--- /dev/null
+++ b/mindspore/core/ir/func_graph_py.cc
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "ir/meta_func_graph.h"
+#include "ir/func_graph.h"
+
+#include "pybind_api/api_register.h"
+#include "pybind_api/export_flags.h"
+
+namespace mindspore {
+REGISTER_PYBIND_DEFINE(FuncGraph, ([](const pybind11::module *m) {
+                         // Define python "MetaFuncGraph_" class
+                         (void)py::class_<MetaFuncGraph, std::shared_ptr<MetaFuncGraph>>(*m, "MetaFuncGraph_")
+                           .def_readonly(PYTHON_METAFUNCGRAPH_FLAG, &MetaFuncGraph::parse_info_)
+                           .def(py::init<std::string &>());
+                         // Define python "FuncGraph" class
+                         (void)py::class_<FuncGraph, FuncGraphPtr>(*m, "FuncGraph")
+                           .def(py::init())
+                           .def("str", &FuncGraph::ToString, "Get FuncGraph string representation.")
+                           .def("get_return", &FuncGraph::get_return, "Get return node of FuncGraph");
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/core/ir/kernel_info_dev.h b/mindspore/core/ir/kernel_info_dev.h
new file mode 100644
index 0000000000..87c717bdcb
--- /dev/null
+++ b/mindspore/core/ir/kernel_info_dev.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
+#define MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
+
+#include <memory>
+
+namespace mindspore {
+// Interface for device kernel program information.
+class KernelInfoDevice {
+ public:
+  // If kernel program was built and build info is set.
+  virtual bool has_build_info() const = 0;
+};
+using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
diff --git a/mindspore/ccsrc/ir/lite/param_value_lite.h b/mindspore/core/ir/lite/param_value_lite.h
similarity index 97%
rename from mindspore/ccsrc/ir/lite/param_value_lite.h
rename to mindspore/core/ir/lite/param_value_lite.h
index 2b249cfa4f..1da9b915c2 100644
--- a/mindspore/ccsrc/ir/lite/param_value_lite.h
+++ b/mindspore/core/ir/lite/param_value_lite.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 
-#include "ir/anf.h"
+#include "ir/param_value.h"
 
 namespace mindspore {
 class ParamValueLite : public ParamValue {
diff --git a/mindspore/ccsrc/ir/lite/tensor.cc b/mindspore/core/ir/lite/tensor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/lite/tensor.cc
rename to mindspore/core/ir/lite/tensor.cc
diff --git a/mindspore/ccsrc/ir/lite/tensor.h b/mindspore/core/ir/lite/tensor.h
similarity index 100%
rename from mindspore/ccsrc/ir/lite/tensor.h
rename to mindspore/core/ir/lite/tensor.h
diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/core/ir/manager.cc
similarity index 99%
rename from mindspore/ccsrc/ir/manager.cc
rename to mindspore/core/ir/manager.cc
index cf56500aea..00c39679cd 100644
--- a/mindspore/ccsrc/ir/manager.cc
+++ b/mindspore/core/ir/manager.cc
@@ -26,7 +26,7 @@
 #include "ir/func_graph.h"
 #include "utils/profile.h"
 #include "utils/convert_utils_base.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/ir/manager.h b/mindspore/core/ir/manager.h
similarity index 100%
rename from mindspore/ccsrc/ir/manager.h
rename to mindspore/core/ir/manager.h
diff --git a/mindspore/ccsrc/ir/meta_func_graph.cc b/mindspore/core/ir/meta_func_graph.cc
similarity index 75%
rename from mindspore/ccsrc/ir/meta_func_graph.cc
rename to mindspore/core/ir/meta_func_graph.cc
index 3b2704613a..c0cf9d4d2f 100644
--- a/mindspore/ccsrc/ir/meta_func_graph.cc
+++ b/mindspore/core/ir/meta_func_graph.cc
@@ -17,22 +17,9 @@
  */
 
 #include "ir/meta_func_graph.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/abstract_function.h"
 
 // namespace to support intermediate representation definition
 namespace mindspore {
-abstract::AbstractBasePtr MetaFuncGraph::MakeAbstractClosure(const AnfNodePtr &anf_node) {
-  abstract::MetaFuncGraphAbstractClosurePtr meta_func_graph_fn;
-  if (anf_node == nullptr) {
-    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(shared_from_base<MetaFuncGraph>());
-  } else {
-    meta_func_graph_fn =
-      std::make_shared<abstract::MetaFuncGraphAbstractClosure>(shared_from_base<MetaFuncGraph>(), anf_node->scope());
-  }
-  return meta_func_graph_fn;
-}
-
 FuncGraphPtr MetaFuncGraph::GenerateFuncGraph(const abstract::AbstractBasePtrList &args_spec_list) {
   TypePtrList types;
   (void)std::transform(args_spec_list.begin(), args_spec_list.end(), std::back_inserter(types),
diff --git a/mindspore/ccsrc/ir/meta_func_graph.h b/mindspore/core/ir/meta_func_graph.h
similarity index 94%
rename from mindspore/ccsrc/ir/meta_func_graph.h
rename to mindspore/core/ir/meta_func_graph.h
index f63f812f9e..933c3f700d 100644
--- a/mindspore/ccsrc/ir/meta_func_graph.h
+++ b/mindspore/core/ir/meta_func_graph.h
@@ -26,15 +26,11 @@
 #include <vector>
 #include <algorithm>
 
-#include "pybind11/pybind11.h"
-
 #include "ir/dtype.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/signature.h"
-#include "pipeline/static_analysis/abstract_value.h"
-
-namespace py = pybind11;
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 // namespace to support intermediate representation definition
@@ -48,7 +44,6 @@ class MetaFuncGraph : public FuncGraphBase {
   ~MetaFuncGraph() override = default;
 
   MS_DECLARE_PARENT(MetaFuncGraph, FuncGraphBase);
-  abstract::AbstractBasePtr MakeAbstractClosure(const AnfNodePtr &anf_node);
   // Return normalized versions of the arguments.
   // By default, this returns args unchanged.
   virtual abstract::AbstractBasePtrList NormalizeArgs(const abstract::AbstractBasePtrList &args_spec_list) const {
diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/core/ir/meta_tensor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/meta_tensor.cc
rename to mindspore/core/ir/meta_tensor.cc
diff --git a/mindspore/ccsrc/ir/meta_tensor.h b/mindspore/core/ir/meta_tensor.h
similarity index 99%
rename from mindspore/ccsrc/ir/meta_tensor.h
rename to mindspore/core/ir/meta_tensor.h
index a8c07d6992..00106215e8 100644
--- a/mindspore/ccsrc/ir/meta_tensor.h
+++ b/mindspore/core/ir/meta_tensor.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <string>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "utils/convert_utils.h"
 #include "utils/hashing.h"
diff --git a/mindspore/ccsrc/ir/meta_tensor_extends.cc b/mindspore/core/ir/meta_tensor_extends.cc
similarity index 96%
rename from mindspore/ccsrc/ir/meta_tensor_extends.cc
rename to mindspore/core/ir/meta_tensor_extends.cc
index 87f1db95e5..d73aa19374 100644
--- a/mindspore/ccsrc/ir/meta_tensor_extends.cc
+++ b/mindspore/core/ir/meta_tensor_extends.cc
@@ -22,7 +22,7 @@
 #include <sstream>
 #include <string>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
diff --git a/mindspore/ccsrc/ir/named.cc b/mindspore/core/ir/named.cc
similarity index 96%
rename from mindspore/ccsrc/ir/named.cc
rename to mindspore/core/ir/named.cc
index 9e1a7968b8..802f0c8693 100644
--- a/mindspore/ccsrc/ir/named.cc
+++ b/mindspore/core/ir/named.cc
@@ -15,7 +15,7 @@
  */
 
 #include "ir/named.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 bool Named::operator==(const Value &other) const {
diff --git a/mindspore/ccsrc/ir/named.h b/mindspore/core/ir/named.h
similarity index 100%
rename from mindspore/ccsrc/ir/named.h
rename to mindspore/core/ir/named.h
diff --git a/mindspore/ccsrc/ir/optimizer_caller.h b/mindspore/core/ir/optimizer_caller.h
similarity index 100%
rename from mindspore/ccsrc/ir/optimizer_caller.h
rename to mindspore/core/ir/optimizer_caller.h
diff --git a/mindspore/core/ir/param_value.h b/mindspore/core/ir/param_value.h
new file mode 100644
index 0000000000..00b79ae91c
--- /dev/null
+++ b/mindspore/core/ir/param_value.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
+#define MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+#include "ir/anf.h"
+#include "ir/tensor.h"
+
+namespace mindspore {
+
+class ParamValue {
+ public:
+  ParamValue() {}
+
+  ParamValue(const ParamValue &other) = default;
+
+  ~ParamValue() = default;
+
+  tensor::MetaTensorPtr value() const { return value_; }
+  void set_value(const tensor::MetaTensorPtr &value) { value_ = value; }
+
+  const std::string &name() const { return name_; }
+  void set_name(const std::string &name) { name_ = name; }
+
+  const std::string &sparse_grad() const { return sparse_grad_; }
+  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
+
+  bool requires_grad() const { return requires_grad_; }
+  void set_requires_grad(bool requires_grad) { requires_grad_ = requires_grad; }
+
+  bool layerwise_parallel() const { return layerwise_parallel_; }
+  void set_layerwise_parallel(bool layerwise_parallel) { layerwise_parallel_ = layerwise_parallel; }
+
+  bool has_indexed_slices_grad() const { return has_indexed_slices_grad_; }
+  void set_has_indexed_slices_grad(bool b) { has_indexed_slices_grad_ = b; }
+
+  // Whether the parameter clone from other parameter.
+  bool cloned() const { return cloned_; }
+
+  // Whether the parameter is cloned.
+  bool be_cloned() const { return be_cloned_; }
+
+  // If the parameter is cloned, generate one index per clone.
+  const std::vector<int32_t> &be_cloned_index() const { return be_cloned_index_; }
+
+  // If the parameter clone from other parameter, it has a unique index.
+  int32_t cloned_index() const { return cloned_index_; }
+
+  // Make a cloned parameter and update clone info.
+  ParamValuePtr Clone() {
+    static std::atomic<int32_t> parameter_cloned_index{1};
+    int32_t index = parameter_cloned_index.fetch_add(1, std::memory_order_relaxed);
+    auto clone = std::make_shared<ParamValue>(*this);
+    clone->be_cloned_ = false;
+    clone->cloned_ = true;
+    clone->be_cloned_index_ = {};
+    clone->cloned_index_ = index;
+    this->be_cloned_ = true;
+    this->be_cloned_index_.push_back(index);
+    return clone;
+  }
+
+ private:
+  tensor::MetaTensorPtr value_;
+  std::string name_{"Parameter"};
+  std::string sparse_grad_;
+  bool requires_grad_{true};
+  bool layerwise_parallel_{false};
+  bool has_indexed_slices_grad_{false};
+  bool be_cloned_{false};
+  bool cloned_{false};
+  std::vector<int32_t> be_cloned_index_;
+  int32_t cloned_index_{0};
+};
+
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
diff --git a/mindspore/core/ir/param_value_py.cc b/mindspore/core/ir/param_value_py.cc
new file mode 100644
index 0000000000..fb4b313c22
--- /dev/null
+++ b/mindspore/core/ir/param_value_py.cc
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/param_value.h"
+#include "pybind11/pybind11.h"
+#include "pybind_api/api_register.h"
+
+namespace mindspore {
+namespace py = pybind11;
+
+REGISTER_PYBIND_DEFINE(ParamValue, ([](const py::module *m) {
+                         (void)py::class_<ParamValue, ParamValuePtr>(*m, "ParamValue")
+                           .def(py::init())
+                           .def("clone", &ParamValue::Clone)
+                           .def_property("data", &ParamValue::value, &ParamValue::set_value)
+                           .def_property("name", &ParamValue::name, &ParamValue::set_name)
+                           .def_property("requires_grad", &ParamValue::requires_grad, &ParamValue::set_requires_grad)
+                           .def_property("layerwise_parallel", &ParamValue::layerwise_parallel,
+                                         &ParamValue::set_layerwise_parallel)
+                           .def_property("has_indexed_slices_grad", &ParamValue::has_indexed_slices_grad,
+                                         &ParamValue::set_has_indexed_slices_grad)
+                           .def_property("sparse_grad", &ParamValue::sparse_grad, &ParamValue::set_sparse_grad)
+                           .def(py::pickle(
+                             [](const ParamValue &p) {  // __getstate__
+                               return py::make_tuple(py::cast(p.value()), p.name(), p.requires_grad(),
+                                                     p.layerwise_parallel(), p.has_indexed_slices_grad(),
+                                                     p.sparse_grad());
+                             },
+                             [](const py::tuple &t) {  // __setstate__
+                               if (t.size() != 6) {
+                                 std::runtime_error("Invalid state for ParamValue!");
+                               }
+                               ParamValuePtr p = std::make_shared<ParamValue>();
+                               p->set_value(t[0].cast<tensor::TensorPtr>());
+                               p->set_name(t[1].cast<std::string>());
+                               p->set_requires_grad(t[2].cast<bool>());
+                               p->set_layerwise_parallel(t[3].cast<bool>());
+                               p->set_has_indexed_slices_grad(t[4].cast<bool>());
+                               p->set_sparse_grad(t[5].cast<std::string>());
+                               return p;
+                             }));
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/pattern_matcher.h b/mindspore/core/ir/pattern_matcher.h
similarity index 99%
rename from mindspore/ccsrc/ir/pattern_matcher.h
rename to mindspore/core/ir/pattern_matcher.h
index 6605b9ce4c..94ba4a381a 100644
--- a/mindspore/ccsrc/ir/pattern_matcher.h
+++ b/mindspore/core/ir/pattern_matcher.h
@@ -21,7 +21,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/ir/primitive_base.cc b/mindspore/core/ir/primitive.cc
similarity index 95%
rename from mindspore/ccsrc/ir/primitive_base.cc
rename to mindspore/core/ir/primitive.cc
index 864427fe13..352c0f31ae 100644
--- a/mindspore/ccsrc/ir/primitive_base.cc
+++ b/mindspore/core/ir/primitive.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 
 #include <utility>
 
diff --git a/mindspore/ccsrc/ir/primitive_base.h b/mindspore/core/ir/primitive.h
similarity index 90%
rename from mindspore/ccsrc/ir/primitive_base.h
rename to mindspore/core/ir/primitive.h
index b34c43d00e..5471b58063 100644
--- a/mindspore/ccsrc/ir/primitive_base.h
+++ b/mindspore/core/ir/primitive.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
-#define MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
+#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#define MINDSPORE_CCSRC_IR_PRIMITIVE_H_
 
 #include <unordered_map>
 #include <vector>
@@ -24,9 +24,9 @@
 #include <tuple>
 
 #include "ir/dtype/type.h"
-#include "pybind11/pybind11.h"
-
-namespace py = pybind11;
+#include "abstract/abstract_value.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "utils/base_ref_extends.h"
 
 namespace mindspore {
 // Supported meta type
@@ -114,6 +114,8 @@ class Primitive : public Named {
   void set_has_signature(bool has_signature) { has_signature_ = has_signature; }
   bool has_signature() const { return has_signature_; }
   bool is_base() const { return is_base_; }
+  virtual BaseRef RunHookFunction(const VectorRef &args) const { MS_LOG(EXCEPTION) << "call a empty function!"; }
+  virtual void CopyHookFunction(const PrimitivePtr &primitive) { MS_LOG(EXCEPTION) << "call a empty function!"; }
 
  protected:
   std::unordered_map<std::string, ValuePtr> attrs_;
@@ -147,4 +149,4 @@ struct PrimitiveHasher {
   }
 };
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
+#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_H_
diff --git a/mindspore/ccsrc/ir/primitive.cc b/mindspore/core/ir/primitive_py.cc
similarity index 60%
rename from mindspore/ccsrc/ir/primitive.cc
rename to mindspore/core/ir/primitive_py.cc
index 6ec27c2567..1a97487ddc 100644
--- a/mindspore/ccsrc/ir/primitive.cc
+++ b/mindspore/core/ir/primitive_py.cc
@@ -14,33 +14,55 @@
  * limitations under the License.
  */
 
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include <mutex>
 #include <utility>
 #include "ir/signature.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "pybind11/pytypes.h"
 #include "utils/convert_utils_base.h"
 #include "utils/primitive_utils.h"
-
+#include "utils/base_ref_py.h"
 #include "pybind_api/api_register.h"
 #include "pybind_api/export_flags.h"
 
 namespace mindspore {
+namespace {
+constexpr auto kBpropAttrName = "bprop";
+constexpr auto kCellHookAttrName = "cell_hook";
+constexpr auto kCellIDAttrName = "cell_id";
+void SyncData(const py::object &arg) {
+  if (py::isinstance<py::tuple>(arg)) {
+    py::tuple arg_list = py::cast<py::tuple>(arg);
+    for (size_t i = 0; i < arg_list.size(); i++) {
+      SyncData(arg_list[i]);
+    }
+  }
+  if (py::isinstance<tensor::Tensor>(arg)) {
+    auto tensor = py::cast<tensor::TensorPtr>(arg);
+    (void)tensor->data_sync();
+  }
+}
+}  // namespace
+std::map<std::string, py::object> PrimitivePy::hook_grad_;
+static ValuePtr PyArgToValue(const py::object &arg) {
+  if (py::isinstance<SignatureEnumKind>(arg) &&
+      py::cast<SignatureEnumKind>(arg) == SignatureEnumKind::kKindEmptyDefaultValue) {
+    return nullptr;
+  }
+  return parse::data_converter::PyDataToValue(arg);
+}
+
 void PrimitivePy::set_signatures(
   std::vector<std::tuple<std::string, SignatureEnumRW, SignatureEnumKind, py::object, SignatureEnumDType>> signatures) {
   signatures_.clear();
   for (auto &signature : signatures) {
-    std::string name;
-    SignatureEnumRW rw;
-    SignatureEnumKind kind;
-    py::object default_value;
-    SignatureEnumDType dtype;
-    std::tie(name, rw, kind, default_value, dtype) = signature;
-    signatures_.emplace_back(Signature(name, rw, kind, default_value, dtype));
+    auto [name, rw, kind, arg_default, dtype] = signature;
+    auto default_value = PyArgToValue(arg_default);
+    signatures_.emplace_back(name, rw, kind, default_value, dtype);
   }
   set_has_signature(true);
 }
@@ -56,6 +78,51 @@ py::function PrimitivePy::GetBpropFunction() {
   }
 }
 
+BaseRef PrimitivePy::RunHookFunction(const VectorRef &args) const {
+  auto py_args = py::tuple(args.size());
+  size_t i = 0;
+  for (auto &arg : args) {
+    py_args[i] = BaseRefToPyData(arg);
+    MS_LOG(DEBUG) << "arg:" << i << ":";
+    i++;
+  }
+  py::object obj;
+  bool is_bprop = this->HasAttr(kBpropAttrName);
+  if (is_bprop) {
+    SyncData(py_args);
+    obj = hook_(*py_args);
+    return std::make_shared<PyObjectRef>(obj);
+  }
+  SyncData(py_args[2]);
+  bool is_cell = this->HasAttr(kCellHookAttrName);
+  if (is_cell) {
+    auto cell_id = GetValue<std::string>(this->GetAttr(kCellIDAttrName));
+    auto iter = hook_grad_.find(cell_id);
+    if (iter != hook_grad_.end()) {
+      auto hook_args = py::tuple(3);
+      hook_args[0] = cell_id;
+      hook_args[1] = py::make_tuple(iter->second);
+      hook_args[2] = py::make_tuple(py_args[2]);
+      obj = hook_(*hook_args);
+      if (py::isinstance<py::none>(obj)) {
+        obj = py_args[2];
+      }
+      hook_grad_.erase(cell_id);
+    } else {
+      hook_grad_[cell_id] = py_args[2];
+      obj = py_args[2];
+    }
+  } else {
+    // Hook operator for execute variable hook function
+    obj = hook_(py::make_tuple(py_args[2]));
+    if (py::isinstance<py::none>(obj)) {
+      obj = py_args[2];
+    }
+  }
+  obj = py::make_tuple(obj);
+  return std::make_shared<PyObjectRef>(obj);
+}
+
 py::function PrimitivePy::GetComputeFunction() {
   static const char *const compute_func_name = "vm_impl";
 
@@ -99,6 +166,16 @@ py::dict PrimitivePy::GetAttrDict() {
   return attr_dict;
 }
 
+void PrimitivePy::CopyHookFunction(const PrimitivePtr &primitive) {
+  MS_EXCEPTION_IF_NULL(primitive);
+  if (!primitive->isa<PrimitivePy>()) {
+    MS_LOG(EXCEPTION) << "Cannot copy a primtive which is not python primitive hook function to python primitive!";
+  }
+  auto primitive_py = primitive->cast<PrimitivePyPtr>();
+  MS_EXCEPTION_IF_NULL(primitive_py);
+  this->set_hook(primitive_py->hook());
+}
+
 REGISTER_PYBIND_DEFINE(Primitive_, ([](const py::module *m) {
                          (void)py::enum_<PrimType>(*m, "prim_type", py::arithmetic())
                            .value("unknown", PrimType::kPrimTypeUnknown)
diff --git a/mindspore/ccsrc/ir/primitive.h b/mindspore/core/ir/primitive_py.h
similarity index 78%
rename from mindspore/ccsrc/ir/primitive.h
rename to mindspore/core/ir/primitive_py.h
index 257302c0c4..2dc45ac341 100644
--- a/mindspore/ccsrc/ir/primitive.h
+++ b/mindspore/core/ir/primitive_py.h
@@ -14,22 +14,25 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_H_
-#define MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
+#define MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
 
 #include <unordered_map>
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
+#include <map>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/misc.h"
+#include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "ir/signature.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
+namespace py = pybind11;
 namespace mindspore {
 class PrimitivePy : public Primitive {
  public:
@@ -46,12 +49,14 @@ class PrimitivePy : public Primitive {
 
   const std::vector<Signature> &signatures() const { return signatures_; }
 
+  void CopyHookFunction(const PrimitivePtr &primitive) override;
+
   void AddPyAttr(const py::str &name, const py::object &obj);
 
   py::dict GetAttrDict();
   void set_hook(const py::function &hook) { hook_ = hook; }
   py::function hook() const { return hook_; }
-
+  BaseRef RunHookFunction(const VectorRef &args) const override;
   const bool parse_info_ = true;
   const py::object &GetPyObj() const { return python_obj_; }
   bool is_tuple_input_ = false;
@@ -60,8 +65,9 @@ class PrimitivePy : public Primitive {
   py::object python_obj_;
   py::function hook_;
   std::vector<Signature> signatures_;
+  static std::map<std::string, py::object> hook_grad_;
 };
 
 using PrimitivePyPtr = std::shared_ptr<PrimitivePy>;
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
diff --git a/mindspore/ccsrc/ir/scalar.h b/mindspore/core/ir/scalar.h
similarity index 99%
rename from mindspore/ccsrc/ir/scalar.h
rename to mindspore/core/ir/scalar.h
index e8e29fb2f9..adae8c65f9 100644
--- a/mindspore/ccsrc/ir/scalar.h
+++ b/mindspore/core/ir/scalar.h
@@ -27,7 +27,7 @@
 #include <utility>
 #include <cfloat>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "ir/dtype/number.h"
 
diff --git a/mindspore/ccsrc/ir/scope.cc b/mindspore/core/ir/scope.cc
similarity index 100%
rename from mindspore/ccsrc/ir/scope.cc
rename to mindspore/core/ir/scope.cc
diff --git a/mindspore/ccsrc/ir/scope.h b/mindspore/core/ir/scope.h
similarity index 100%
rename from mindspore/ccsrc/ir/scope.h
rename to mindspore/core/ir/scope.h
diff --git a/mindspore/ccsrc/ir/signature.h b/mindspore/core/ir/signature.h
similarity index 85%
rename from mindspore/ccsrc/ir/signature.h
rename to mindspore/core/ir/signature.h
index 48be7e0f31..e9a5a2e1ca 100644
--- a/mindspore/ccsrc/ir/signature.h
+++ b/mindspore/core/ir/signature.h
@@ -16,14 +16,11 @@
 
 #ifndef MINDSPORE_CCSRC_IR_SIGNATURE_H_
 #define MINDSPORE_CCSRC_IR_SIGNATURE_H_
+
 #include <string>
 #include <vector>
-
-#include "pybind11/operators.h"
 #include "ir/value.h"
 
-namespace py = pybind11;
-
 namespace mindspore {
 // Input signature, support type
 enum SignatureEnumRW {
@@ -62,8 +59,10 @@ struct Signature {
   ValuePtr default_value;  // nullptr for no default value
   SignatureEnumDType dtype;
   Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind,
-            const py::object &arg_default, const SignatureEnumDType &arg_dtype);
-  Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind);
+            const ValuePtr &arg_default, const SignatureEnumDType &arg_dtype)
+      : name(arg_name), rw(rw_tag), kind(arg_kind), default_value(arg_default), dtype(arg_dtype) {}
+  Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind)
+      : Signature(arg_name, rw_tag, arg_kind, nullptr, SignatureEnumDType::kDTypeEmptyDefaultValue) {}
 };
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/ir/signature.cc b/mindspore/core/ir/signature_py.cc
similarity index 76%
rename from mindspore/ccsrc/ir/signature.cc
rename to mindspore/core/ir/signature_py.cc
index 8f312d5b98..f513df8533 100644
--- a/mindspore/ccsrc/ir/signature.cc
+++ b/mindspore/core/ir/signature_py.cc
@@ -15,30 +15,14 @@
  */
 
 #include "ir/signature.h"
-
 #include "pybind11/operators.h"
 #include "pybind_api/api_register.h"
-#include "pipeline/parse/data_converter.h"
-
-namespace mindspore {
-Signature::Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind,
-                     const py::object &arg_default, const SignatureEnumDType &arg_dtype)
-    : name(arg_name), rw(rw_tag), kind(arg_kind), dtype(arg_dtype) {
-  if (py::isinstance<SignatureEnumKind>(arg_default) &&
-      py::cast<SignatureEnumKind>(arg_default) == SignatureEnumKind::kKindEmptyDefaultValue) {
-    default_value = nullptr;
-  } else {
-    default_value = parse::data_converter::PyDataToValue(arg_default);
-  }
-}
+#include "pipeline/jit/parse/data_converter.h"
 
-Signature::Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind)
-    : name(arg_name),
-      rw(rw_tag),
-      kind(arg_kind),
-      default_value(nullptr),
-      dtype(SignatureEnumDType::kDTypeEmptyDefaultValue) {}
+namespace py = pybind11;
 
+namespace mindspore {
+// Bind SignatureEnumRW as a python class.
 REGISTER_PYBIND_DEFINE(SignatureEnumRW, ([](const py::module *m) {
                          (void)py::enum_<SignatureEnumRW>(*m, "signature_rw", py::arithmetic())
                            .value("RW_READ", SignatureEnumRW::kRWRead)
diff --git a/mindspore/ccsrc/ir/tensor.cc b/mindspore/core/ir/tensor.cc
similarity index 69%
rename from mindspore/ccsrc/ir/tensor.cc
rename to mindspore/core/ir/tensor.cc
index c06ba2a820..c04c2cca96 100644
--- a/mindspore/ccsrc/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@@ -23,12 +23,22 @@
 #include <sstream>
 #include <string>
 #include <utility>
+#include <iomanip>
+#include <algorithm>
+#include <type_traits>
+#include <typeinfo>
 
-#include "device/device_address.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "runtime/device/device_address.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
+constexpr auto kEllipsis = "...";
+constexpr auto kThreshold = 6;
+
+constexpr auto kThreshold1DFloat = kThreshold * 2;
+constexpr auto kThreshold1DInt = kThreshold * 4;
+constexpr auto kThreshold1DBool = kThreshold * 2;
 
 static std::string MakeId() {
   // Use atomic to make id generator thread safe.
@@ -115,6 +125,7 @@ template <typename T>
 class TensorDataImpl : public TensorData {
  public:
   explicit TensorDataImpl(const std::vector<int> &shape) : ndim_(shape.size()), data_size_(SizeOf(shape)) {}
+  ~TensorDataImpl() = default;
 
   TensorDataImpl(const std::vector<int> &shape, void *data, size_t data_len)
       : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(CopyData<T>(shape, data, data_len)) {}
@@ -144,8 +155,8 @@ class TensorDataImpl : public TensorData {
       // Prevent null pointer for empty shape.
       return empty_data.data();
     }
+    // Lazy allocation.
     if (data_.empty()) {
-      // Lazy allocation.
       data_.resize(data_size_);
     }
     return data_.data();
@@ -159,24 +170,140 @@ class TensorDataImpl : public TensorData {
     return false;
   }
 
-  std::string ToString() const override {
+  std::string ToString(const TypeId type, const std::vector<int> &shape) const override {
+    constexpr auto valid =
+      std::is_same<T, Bool>::value || std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
+      std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value ||
+      std::is_same<T, uint16_t>::value || std::is_same<T, uint32_t>::value || std::is_same<T, uint64_t>::value ||
+      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
+    static_assert(valid, "Type is invalid");
+    if (data_size_ == 0) {
+      return "";
+    }
+    if (data_.empty()) {
+      return "<uninitialized>";
+    }
+
     std::ostringstream ss;
-    ss << '[';
-    for (auto value : data_) {
-      ss << value << ',';
+    if (data_size_ == 1 && ndim_ == 0) {  // Scalar
+      OutputDataString(ss, type, 0, 0, 1);
+      return ss.str();
     }
-    ss << ']';
+    ssize_t cursor = 0;
+    SummaryStringRecursive(ss, type, shape, &cursor, 0);
     return ss.str();
   }
 
  private:
+  void OutputDataString(std::ostringstream &ss, const TypeId type, ssize_t cursor, ssize_t start, ssize_t end) const {
+    bool isScalar = ndim_ == 0 && end - start == 1;
+    int linefeedThreshold;
+    constexpr auto isFloat =
+      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
+    for (ssize_t i = start; i < end && (cursor + i) < static_cast<ssize_t>(data_size_); i++) {
+      const auto value = data_[cursor + i];
+      if constexpr (isFloat) {
+        if (isScalar) {
+          ss << value;
+        } else {
+          ss << std::setw(15) << std::setprecision(8) << std::setiosflags(std::ios::scientific | std::ios::right)
+             << value;
+        }
+        linefeedThreshold = kThreshold1DFloat;
+      } else if (type == kNumberTypeBool) {
+        if (isScalar) {
+          ss << (value == 0 ? "False" : "True");
+        } else {
+          ss << std::setw(5) << std::setiosflags(std::ios::right) << (value == 0 ? "False" : "True");
+        }
+        linefeedThreshold = kThreshold1DBool;
+      } else {
+        constexpr auto isSigned = std::is_same<T, int8_t>::value || std::is_same<T, int16_t>::value ||
+                                  std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value;
+        if constexpr (isSigned) {
+          if (!isScalar && static_cast<int64_t>(value) >= 0) {
+            ss << ' ';
+          }
+        }
+        if constexpr (std::is_same<T, int8_t>::value) {
+          ss << static_cast<int16_t>(value);
+        } else if constexpr (std::is_same<T, uint8_t>::value) {
+          ss << static_cast<uint16_t>(value);
+        } else {
+          ss << value;
+        }
+        linefeedThreshold = kThreshold1DInt;
+      }
+      if (!isScalar && i != end - 1) {
+        ss << ' ';
+      }
+      if (!isScalar && ndim_ == 1 && (i + 1) % linefeedThreshold == 0) {
+        // Add a line feed every {threshold of type} for 1D tensor.
+        ss << '\n' << ' ';
+      }
+    }
+  }
+
+  void SummaryStringRecursive(std::ostringstream &ss, const TypeId type, const std::vector<int> &shape, ssize_t *cursor,
+                              ssize_t depth) const {
+    if (depth >= static_cast<ssize_t>(ndim_)) {
+      return;
+    }
+    ss << '[';
+    if (depth == static_cast<ssize_t>(ndim_) - 1) {  // Bottom dimension
+      ssize_t num = shape[depth];
+      if (num > kThreshold && ndim_ > 1) {
+        OutputDataString(ss, type, *cursor, 0, kThreshold / 2);
+        ss << ' ' << kEllipsis << ' ';
+        OutputDataString(ss, type, *cursor, num - kThreshold / 2, num);
+      } else {
+        OutputDataString(ss, type, *cursor, 0, num);
+      }
+      *cursor += num;
+    } else {  // Middle dimension
+      ssize_t num = shape[depth];
+      // Handle the first half.
+      for (ssize_t i = 0; i < std::min(static_cast<ssize_t>(kThreshold / 2), num); i++) {
+        if (i > 0) {
+          ss << '\n';
+          ss << std::setw(depth + 1) << ' ';  // Add the indent.
+        }
+        SummaryStringRecursive(ss, type, shape, cursor, depth + 1);
+      }
+      // Handle the ignored part.
+      if (num > kThreshold) {
+        ss << '\n';
+        ss << std::setw(depth + 1) << ' ';  // Add the indent.
+        ss << kEllipsis;
+        // Ignored at this layer.
+        ssize_t ignored = shape[depth + 1];
+        for (ssize_t i = depth + 2; i < static_cast<ssize_t>(ndim_); i++) {
+          ignored *= shape[i];
+        }
+        // Multiple with ignored layers number.
+        ignored *= num - kThreshold;
+
+        *cursor += ignored;
+      }
+      // Handle the second half.
+      if (num > kThreshold / 2) {
+        for (ssize_t i = num - kThreshold / 2; i < num; i++) {
+          ss << '\n';
+          ss << std::setw(depth + 1) << ' ';  // Add the indent.
+          SummaryStringRecursive(ss, type, shape, cursor, depth + 1);
+        }
+      }
+    }
+    ss << ']';
+  }
+
   size_t ndim_{0};
   size_t data_size_{0};
   std::vector<T> data_;
 };
 
 template <typename... Args>
-TensorDataPtr MakeTensorData(TypeId data_type, const std::vector<int> &shape, Args... args) {
+TensorDataPtr MakeTensorData(TypeId data_type, const std::vector<int> &shape, const Args... args) {
   switch (data_type) {
     case kNumberTypeBool:
     case kNumberTypeUInt8:
@@ -213,7 +340,7 @@ Tensor::Tensor(const Tensor &tensor)
       data_(tensor.data_),
       dirty_(tensor.dirty_),
       id_(tensor.id_),
-      device_address_(tensor.device_address_) {}
+      device_sync_(tensor.device_sync_) {}
 
 Tensor::Tensor(const Tensor &tensor, TypeId data_type)
     : MetaTensor(data_type, tensor.shape_),
@@ -221,7 +348,7 @@ Tensor::Tensor(const Tensor &tensor, TypeId data_type)
       data_(MakeTensorData(data_type, tensor.shape_, tensor.data_->data(), tensor.data_type_)),
       dirty_(tensor.dirty_),
       id_(tensor.id_),
-      device_address_(tensor.device_address_) {}
+      device_sync_(tensor.device_sync_) {}
 
 Tensor::Tensor(TypeId data_type, const std::vector<int> &shape, TensorDataPtr data)
     : MetaTensor(data_type, shape), data_(std::move(data)), id_(MakeId()) {}
@@ -266,10 +393,10 @@ bool Tensor::ValueEqual(const Tensor &tensor) const {
 Tensor &Tensor::AssignValue(const Tensor &tensor) {
   if (this != &tensor) {
     MetaTensor::operator=(tensor);
-    dirty_ = tensor.is_dirty();
-    device_address_ = tensor.device_address();
+    dirty_ = tensor.dirty_;
+    device_sync_ = tensor.device_sync_;
     data_ = tensor.data_;
-    id_ = tensor.id();
+    id_ = tensor.id_;
   }
   return *this;
 }
@@ -297,7 +424,7 @@ std::string Tensor::ToString() const {
   buf << "Tensor shape:[" << shape() << "]" << this->Dtype()->ToString();
   // only print small tensor
   if (DataSize() < small_tensor_size) {
-    buf << "val:" << data().ToString();
+    buf << ", value:" << data().ToString(data_type_, shape());
   }
   return buf.str();
 }
@@ -307,13 +434,13 @@ std::string Tensor::ToStringRepr() const {
   auto type_ptr = this->Dtype();
   MS_EXCEPTION_IF_NULL(type_ptr);
   buf << "Tensor shape:[" << shape() << "]" << type_ptr->ToString();
-  buf << "\nval:" << data().ToString();
+  buf << "\nvalue:" << data().ToString(data_type_, shape());
   return buf.str();
 }
 
 void Tensor::data_sync() const {
-  if (device_address_ != nullptr) {
-    if (!device_address_->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
+  if (device_sync_ != nullptr) {
+    if (!device_sync_->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
       MS_LOG(EXCEPTION) << "SyncDeviceToHost when asnumpy.";
     }
   }
diff --git a/mindspore/ccsrc/ir/tensor.h b/mindspore/core/ir/tensor.h
similarity index 94%
rename from mindspore/ccsrc/ir/tensor.h
rename to mindspore/core/ir/tensor.h
index 5be8a063c1..727fb0fdd8 100644
--- a/mindspore/ccsrc/ir/tensor.h
+++ b/mindspore/core/ir/tensor.h
@@ -23,15 +23,13 @@
 #include <numeric>
 
 #include "Eigen/Core"
-#include "device/device_address.h"
+#include "ir/device_sync.h"
 #include "ir/meta_tensor.h"
 #include "include/ms_tensor.h"
 #include "utils/log_adapter.h"
 
 using float16 = Eigen::half;
 
-using mindspore::device::DeviceAddress;
-using DeviceAddressPtr = std::shared_ptr<mindspore::device::DeviceAddress>;
 // brief mindspore namespace.
 //
 // mindspore namespace is the top level namespace of MindSpore project.
@@ -57,7 +55,7 @@ class TensorData {
   /// Is data equals.
   virtual bool equals(const TensorData &other) const = 0;
   /// To string.
-  virtual std::string ToString() const = 0;
+  virtual std::string ToString(const TypeId type, const std::vector<int> &shape) const = 0;
 };
 
 using TensorDataPtr = std::shared_ptr<TensorData>;
@@ -180,7 +178,6 @@ class Tensor : public MetaTensor {
 
   // brief Get Tensor data pointer for c++ type
   //
-  // param writable true if writable, false if read only
   // return The pointer to the object
   void *data_c() { return data().data(); }
 
@@ -217,14 +214,14 @@ class Tensor : public MetaTensor {
 
   std::string ToStringRepr() const;
 
-  bool is_init() { return init_flag_; }
+  bool is_init() const { return init_flag_; }
   void set_init_flag(bool flag) { init_flag_ = flag; }
 
   bool is_dirty() const { return dirty_; }
   void set_dirty(const bool dirty) { dirty_ = dirty; }
 
-  DeviceAddressPtr device_address() const { return device_address_; }
-  void set_device_address(const DeviceAddressPtr &device_address) { device_address_ = device_address; }
+  DeviceSyncPtr device_address() const { return device_sync_; }
+  void set_device_address(const DeviceSyncPtr &device_sync) { device_sync_ = device_sync; }
 
   std::string id() const { return id_; }
 
@@ -235,7 +232,7 @@ class Tensor : public MetaTensor {
   TensorDataPtr data_{nullptr};
   bool dirty_{true};
   std::string id_{""};
-  DeviceAddressPtr device_address_{nullptr};
+  DeviceSyncPtr device_sync_{nullptr};
 };
 using TensorPtr = std::shared_ptr<Tensor>;
 using TensorPtrList = std::vector<std::shared_ptr<Tensor>>;
diff --git a/mindspore/ccsrc/ir/tensor_py.cc b/mindspore/core/ir/tensor_py.cc
similarity index 98%
rename from mindspore/ccsrc/ir/tensor_py.cc
rename to mindspore/core/ir/tensor_py.cc
index 11a000cef7..ef78d2720e 100644
--- a/mindspore/ccsrc/ir/tensor_py.cc
+++ b/mindspore/core/ir/tensor_py.cc
@@ -22,10 +22,9 @@
 #include <sstream>
 #include <string>
 
-#include "device/device_address.h"
 #include "pybind_api/api_register.h"
 #include "pybind_api/export_flags.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
@@ -213,9 +212,28 @@ static std::vector<int> GetShapeFromTuple(const py::tuple &tuple) {
 }
 
 REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
+                         // Define python MetaTensor class.
+                         (void)py::class_<MetaTensor, std::shared_ptr<MetaTensor>>(*m, "MetaTensor")
+                           .def(py::init<TypePtr, const std::vector<int>>(), py::arg("dtype"), py::arg("shape"))
+                           .def_readonly(PYTHON_META_TENSOR_FLAG, &MetaTensor::parse_info_)
+                           .def_property_readonly("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.")
+                           .def_property_readonly("shape", &MetaTensor::shape, "Get the MetaTensor's shape.")
+                           .def(py::pickle(
+                             [](const MetaTensor &t) {  // __getstate__
+                               /* Return a tuple that fully encodes the state of the object */
+                               return py::make_tuple(static_cast<int>(t.data_type()), t.shape());
+                             },
+                             [](const py::tuple &t) {  // __setstate__
+                               if (t.size() != 2) {
+                                 throw std::runtime_error("Invalid state!");
+                               }
+                               /* Create a new C++ instance */
+                               MetaTensor tensor(TypeId(t[0].cast<int>()), t[1].cast<std::vector<int>>());
+                               return tensor;
+                             }));
                          // Define python Tensor class.
                          // dtype should define before Tensor, because Tensor init depend dtype
-                         (void)py::class_<Tensor, std::shared_ptr<Tensor>>(*m, "Tensor")
+                         (void)py::class_<Tensor, MetaTensor, std::shared_ptr<Tensor>>(*m, "Tensor")
                            .def(py::init([](const Tensor &tensor) { return std::make_shared<Tensor>(tensor); }),
                                 py::arg("input"))
                            .def(py::init([](const Tensor &tensor, const TypePtr &type_ptr) {
@@ -252,6 +270,7 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
                                 }),
                                 py::arg("input"), py::arg("dtype") = nullptr)
                            .def_readonly(PYTHON_TENSOR_FLAG, &Tensor::parse_info_)
+                           .def_property("init_flag", &Tensor::is_init, &Tensor::set_init_flag)
                            .def_property_readonly("dtype", &Tensor::Dtype, R"mydelimiter(
                              Get the tensor's data type.
 
@@ -365,26 +384,6 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
                                /* Create a new C++ instance */
                                return TensorPy::MakeTensor(t[0].cast<py::array>());
                              }));
-                         // Define python MetaTensor class.
-                         (void)py::class_<MetaTensor, std::shared_ptr<MetaTensor>>(*m, "MetaTensor")
-                           .def(py::init<TypePtr, const std::vector<int>>(), py::arg("dtype"), py::arg("shape"))
-                           .def_readonly(PYTHON_META_TENSOR_FLAG, &MetaTensor::parse_info_)
-                           .def_property_readonly("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.")
-                           .def_property_readonly("shape", &MetaTensor::shape, "Get the MetaTensor's shape.")
-                           .def(py::pickle(
-                             [](const MetaTensor &t) {  // __getstate__
-                               /* Return a tuple that fully encodes the state of the object */
-                               return py::make_tuple(static_cast<int>(t.data_type()), t.shape());
-                             },
-                             [](const py::tuple &t) {  // __setstate__
-                               if (t.size() != 2) {
-                                 throw std::runtime_error("Invalid state!");
-                               }
-                               /* Create a new C++ instance */
-                               MetaTensor tensor(TypeId(t[0].cast<int>()), t[1].cast<std::vector<int>>());
-                               return tensor;
-                             }));
                        }));
-
 }  // namespace tensor
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/tensor_py.h b/mindspore/core/ir/tensor_py.h
similarity index 96%
rename from mindspore/ccsrc/ir/tensor_py.h
rename to mindspore/core/ir/tensor_py.h
index 18ee547071..f917584977 100644
--- a/mindspore/ccsrc/ir/tensor_py.h
+++ b/mindspore/core/ir/tensor_py.h
@@ -81,8 +81,6 @@ struct type_caster<float16> : public npy_scalar_caster<float16> {
 }  // namespace detail
 }  // namespace pybind11
 
-using mindspore::device::DeviceAddress;
-using DeviceAddressPtr = std::shared_ptr<mindspore::device::DeviceAddress>;
 // brief mindspore namespace.
 //
 // mindspore namespace is the top level namespace of Mindsporeession project.
diff --git a/mindspore/ccsrc/ir/value.cc b/mindspore/core/ir/value.cc
similarity index 100%
rename from mindspore/ccsrc/ir/value.cc
rename to mindspore/core/ir/value.cc
diff --git a/mindspore/ccsrc/ir/value.h b/mindspore/core/ir/value.h
similarity index 99%
rename from mindspore/ccsrc/ir/value.h
rename to mindspore/core/ir/value.h
index ea9bb47ffe..535de81adf 100644
--- a/mindspore/ccsrc/ir/value.h
+++ b/mindspore/core/ir/value.h
@@ -25,7 +25,7 @@
 #include <sstream>
 #include <utility>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "ir/scalar.h"
diff --git a/mindspore/ccsrc/ir/value_extends.cc b/mindspore/core/ir/value_extends.cc
similarity index 91%
rename from mindspore/ccsrc/ir/value_extends.cc
rename to mindspore/core/ir/value_extends.cc
index 8eb34d0eeb..c75da80665 100644
--- a/mindspore/ccsrc/ir/value_extends.cc
+++ b/mindspore/core/ir/value_extends.cc
@@ -20,8 +20,7 @@
 #include <cmath>
 #include <cfloat>
 
-#include "pybind_api/api_register.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 using ContextPtr = abstract::AnalysisContextPtr;
@@ -83,9 +82,4 @@ abstract::AbstractBasePtr ValueDictionary::ToAbstract() {
     [](const std::pair<std::string, ValuePtr> &item) { return std::make_pair(item.first, item.second->ToAbstract()); });
   return std::make_shared<abstract::AbstractDictionary>(kv);
 }
-
-REGISTER_PYBIND_DEFINE(
-  RefKey, ([](const py::module *m) {
-    (void)py::class_<RefKey, std::shared_ptr<RefKey>>(*m, "RefKey").def(py::init<std::string>(), py::arg("tag"));
-  }));
 }  // namespace mindspore
diff --git a/mindspore/core/ir/value_py.cc b/mindspore/core/ir/value_py.cc
new file mode 100644
index 0000000000..1d80c74c4d
--- /dev/null
+++ b/mindspore/core/ir/value_py.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/value.h"
+#include <string>
+
+#include "pybind_api/api_register.h"
+#include "abstract/abstract_value.h"
+
+namespace mindspore {
+// Define python 'RefKey' class.
+REGISTER_PYBIND_DEFINE(
+  RefKey, ([](const py::module *m) {
+    (void)py::class_<RefKey, std::shared_ptr<RefKey>>(*m, "RefKey").def(py::init<std::string>(), py::arg("tag"));
+  }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/visitor.cc b/mindspore/core/ir/visitor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/visitor.cc
rename to mindspore/core/ir/visitor.cc
diff --git a/mindspore/ccsrc/ir/visitor.h b/mindspore/core/ir/visitor.h
similarity index 100%
rename from mindspore/ccsrc/ir/visitor.h
rename to mindspore/core/ir/visitor.h
diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py
index f0070b428d..b2d26b41ee 100644
--- a/mindspore/dataset/__init__.py
+++ b/mindspore/dataset/__init__.py
@@ -18,12 +18,13 @@ datasets in special format, including mindrecord, tfrecord, manifest. Users
 can also create samplers with this module to sample data.
 """
 
-from .core.configuration import config
+from .core import config
 from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, NumpySlicesDataset, \
     GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset,\
     TextFileDataset, CLUEDataset, Schema, Shuffle, zip, RandomDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
     WeightedRandomSampler, Sampler
+from .engine.cache_client import DatasetCache
 from .engine.serializer_deserializer import serialize, deserialize, show
 from .engine.graphdata import GraphData
 
diff --git a/mindspore/dataset/core/config.py b/mindspore/dataset/core/config.py
new file mode 100644
index 0000000000..c863186d97
--- /dev/null
+++ b/mindspore/dataset/core/config.py
@@ -0,0 +1,195 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+The configuration manager.
+"""
+import random
+import numpy
+import mindspore._c_dataengine as cde
+
+__all__ = ['set_seed', 'get_seed', 'set_prefetch_size', 'get_prefetch_size', 'set_num_parallel_workers',
+           'get_num_parallel_workers', 'set_monitor_sampling_interval', 'get_monitor_sampling_interval', 'load']
+
+INT32_MAX = 2147483647
+UINT32_MAX = 4294967295
+
+_config = cde.GlobalContext.config_manager()
+
+
+def set_seed(seed):
+    """
+    Set the seed to be used in any random generator. This is used to produce deterministic results.
+
+    Note:
+        This set_seed function sets the seed in the python random library and numpy.random library
+        for deterministic python augmentations using randomness. This set_seed function should
+        be called with every iterator created to reset the random seed. In our pipeline this
+        does not guarantee deterministic results with num_parallel_workers > 1.
+
+    Args:
+        seed(int): seed to be set.
+
+    Raises:
+        ValueError: If seed is invalid (< 0 or > MAX_UINT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new seed value, now operators with a random seed will use new seed value.
+        >>> ds.config.set_seed(1000)
+    """
+    if seed < 0 or seed > UINT32_MAX:
+        raise ValueError("Seed given is not within the required range.")
+    _config.set_seed(seed)
+    random.seed(seed)
+    # numpy.random isn't thread safe
+    numpy.random.seed(seed)
+
+
+def get_seed():
+    """
+    Get the seed.
+
+    Returns:
+        Int, seed.
+    """
+    return _config.get_seed()
+
+
+def set_prefetch_size(size):
+    """
+    Set the number of rows to be prefetched.
+
+    Args:
+        size (int): total number of rows to be prefetched.
+
+    Raises:
+        ValueError: If prefetch_size is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new prefetch value.
+        >>> ds.config.set_prefetch_size(1000)
+    """
+    if size <= 0 or size > INT32_MAX:
+        raise ValueError("Prefetch size given is not within the required range.")
+    _config.set_op_connector_size(size)
+
+
+def get_prefetch_size():
+    """
+    Get the prefetch size in number of rows.
+
+    Returns:
+        Size, total number of rows to be prefetched.
+    """
+    return _config.get_op_connector_size()
+
+
+def set_num_parallel_workers(num):
+    """
+    Set the default number of parallel workers.
+
+    Args:
+        num (int): number of parallel workers to be used as a default for each operation.
+
+    Raises:
+        ValueError: If num_parallel_workers is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new parallel_workers value, now parallel dataset operators will run with 8 workers.
+        >>> ds.config.set_num_parallel_workers(8)
+    """
+    if num <= 0 or num > INT32_MAX:
+        raise ValueError("Num workers given is not within the required range.")
+    _config.set_num_parallel_workers(num)
+
+
+def get_num_parallel_workers():
+    """
+    Get the default number of parallel workers.
+
+    Returns:
+        Int, number of parallel workers to be used as a default for each operation
+    """
+    return _config.get_num_parallel_workers()
+
+
+def set_monitor_sampling_interval(interval):
+    """
+    Set the default interval(ms) of monitor sampling.
+
+    Args:
+        interval (int): interval(ms) to be used to performance monitor sampling.
+
+    Raises:
+        ValueError: If interval is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new interval value.
+        >>> ds.config.set_monitor_sampling_interval(100)
+    """
+    if interval <= 0 or interval > INT32_MAX:
+        raise ValueError("Interval given is not within the required range.")
+    _config.set_monitor_sampling_interval(interval)
+
+
+def get_monitor_sampling_interval():
+    """
+    Get the default interval of performance monitor sampling.
+
+    Returns:
+        Interval: interval(ms) of performance monitor sampling.
+    """
+    return _config.get_monitor_sampling_interval()
+
+
+def __str__():
+    """
+    String representation of the configurations.
+
+    Returns:
+        Str, configurations.
+    """
+    return str(_config)
+
+
+def load(file):
+    """
+    Load configuration from a file.
+
+    Args:
+        file (str): path the config file to be loaded.
+
+    Raises:
+        RuntimeError: If file is invalid and parsing fails.
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the default value according to values in configuration file.
+        >>> ds.config.load("path/to/config/file")
+        >>> # example config file:
+        >>> # {
+        >>> #     "logFilePath": "/tmp",
+        >>> #     "rowsPerBuffer": 32,
+        >>> #     "numParallelWorkers": 4,
+        >>> #     "workerConnectorSize": 16,
+        >>> #     "opConnectorSize": 16,
+        >>> #     "seed": 5489,
+        >>> #     "monitorSamplingInterval": 30
+        >>> # }
+    """
+    _config.load(file)
diff --git a/mindspore/dataset/core/configuration.py b/mindspore/dataset/core/configuration.py
deleted file mode 100644
index 5376c668c4..0000000000
--- a/mindspore/dataset/core/configuration.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-The configuration manager.
-"""
-import random
-import numpy
-import mindspore._c_dataengine as cde
-
-INT32_MAX = 2147483647
-UINT32_MAX = 4294967295
-
-
-class ConfigurationManager:
-    """The configuration manager"""
-
-    def __init__(self):
-        self.config = cde.GlobalContext.config_manager()
-
-    def set_seed(self, seed):
-        """
-        Set the seed to be used in any random generator. This is used to produce deterministic results.
-
-        Note:
-            This set_seed function sets the seed in the python random library and numpy.random library
-            for deterministic python augmentations using randomness. This set_seed function should
-            be called with every iterator created to reset the random seed. In our pipeline this
-            does not guarantee deterministic results with num_parallel_workers > 1.
-
-        Args:
-            seed(int): seed to be set
-
-        Raises:
-            ValueError: If seed is invalid (< 0 or > MAX_UINT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new seed value, now operators with a random seed will use new seed value.
-            >>> con.set_seed(1000)
-        """
-        if seed < 0 or seed > UINT32_MAX:
-            raise ValueError("Seed given is not within the required range")
-        self.config.set_seed(seed)
-        random.seed(seed)
-        # numpy.random isn't thread safe
-        numpy.random.seed(seed)
-
-    def get_seed(self):
-        """
-        Get the seed
-
-        Returns:
-            Int, seed.
-        """
-        return self.config.get_seed()
-
-    def set_prefetch_size(self, size):
-        """
-        Set the number of rows to be prefetched.
-
-        Args:
-            size: total number of rows to be prefetched.
-
-        Raises:
-            ValueError: If prefetch_size is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new prefetch value.
-            >>> con.set_prefetch_size(1000)
-        """
-        if size <= 0 or size > INT32_MAX:
-            raise ValueError("Prefetch size given is not within the required range")
-        self.config.set_op_connector_size(size)
-
-    def get_prefetch_size(self):
-        """
-        Get the prefetch size in number of rows.
-
-        Returns:
-            Size, total number of rows to be prefetched.
-        """
-        return self.config.get_op_connector_size()
-
-    def set_num_parallel_workers(self, num):
-        """
-        Set the default number of parallel workers
-
-        Args:
-            num: number of parallel workers to be used as a default for each operation
-
-        Raises:
-            ValueError: If num_parallel_workers is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new parallel_workers value, now parallel dataset operators will run with 8 workers.
-            >>> con.set_num_parallel_workers(8)
-        """
-        if num <= 0 or num > INT32_MAX:
-            raise ValueError("Num workers given is not within the required range")
-        self.config.set_num_parallel_workers(num)
-
-    def get_num_parallel_workers(self):
-        """
-        Get the default number of parallel workers.
-
-        Returns:
-            Int, number of parallel workers to be used as a default for each operation
-        """
-        return self.config.get_num_parallel_workers()
-
-    def set_monitor_sampling_interval(self, interval):
-        """
-        Set the default interval(ms) of monitor sampling.
-
-        Args:
-            interval: interval(ms) to be used to performance monitor sampling.
-
-        Raises:
-            ValueError: If interval is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new interval value.
-            >>> con.set_monitor_sampling_interval(100)
-        """
-        if interval <= 0 or interval > INT32_MAX:
-            raise ValueError("Interval given is not within the required range")
-        self.config.set_monitor_sampling_interval(interval)
-
-    def get_monitor_sampling_interval(self):
-        """
-        Get the default interval of performance monitor sampling.
-
-        Returns:
-            Interval: interval(ms) of performance monitor sampling.
-        """
-        return self.config.get_monitor_sampling_interval()
-
-    def __str__(self):
-        """
-        String representation of the configurations.
-
-        Returns:
-            Str, configurations.
-        """
-        return str(self.config)
-
-    def load(self, file):
-        """
-        Load configuration from a file.
-
-        Args:
-            file: path the config file to be loaded
-
-        Raises:
-            RuntimeError: If file is invalid and parsing fails.
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the default value according to values in configuration file.
-            >>> con.load("path/to/config/file")
-            >>> # example config file:
-            >>> # {
-            >>> #     "logFilePath": "/tmp",
-            >>> #     "rowsPerBuffer": 32,
-            >>> #     "numParallelWorkers": 4,
-            >>> #     "workerConnectorSize": 16,
-            >>> #     "opConnectorSize": 16,
-            >>> #     "seed": 5489,
-            >>> #     "monitorSamplingInterval": 30
-            >>> # }
-        """
-        self.config.load(file)
-
-
-config = ConfigurationManager()
diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py
new file mode 100644
index 0000000000..8806babd63
--- /dev/null
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -0,0 +1,360 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+General Validators.
+"""
+import inspect
+from multiprocessing import cpu_count
+import os
+import numpy as np
+from ..engine import samplers
+
+# POS_INT_MIN is used to limit values from starting from 0
+POS_INT_MIN = 1
+UINT8_MAX = 255
+UINT8_MIN = 0
+UINT32_MAX = 4294967295
+UINT32_MIN = 0
+UINT64_MAX = 18446744073709551615
+UINT64_MIN = 0
+INT32_MAX = 2147483647
+INT32_MIN = -2147483648
+INT64_MAX = 9223372036854775807
+INT64_MIN = -9223372036854775808
+FLOAT_MAX_INTEGER = 16777216
+FLOAT_MIN_INTEGER = -16777216
+DOUBLE_MAX_INTEGER = 9007199254740992
+DOUBLE_MIN_INTEGER = -9007199254740992
+
+valid_detype = [
+    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
+    "uint32", "uint64", "float16", "float32", "float64", "string"
+]
+
+
+def pad_arg_name(arg_name):
+    if arg_name != "":
+        arg_name = arg_name + " "
+    return arg_name
+
+
+def check_value(value, valid_range, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if value < valid_range[0] or value > valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, valid_range[0],
+                                                                                   valid_range[1]))
+
+
+def check_range(values, valid_range, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if not valid_range[0] <= values[0] <= values[1] <= valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, valid_range[0],
+                                                                                   valid_range[1]))
+
+
+def check_positive(value, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if value <= 0:
+        raise ValueError("Input {0}must be greater than 0.".format(arg_name))
+
+
+def check_positive_float(value, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    type_check(value, (float,), arg_name)
+    check_positive(value, arg_name)
+
+
+def check_2tuple(value, arg_name=""):
+    if not (isinstance(value, tuple) and len(value) == 2):
+        raise ValueError("Value {0}needs to be a 2-tuple.".format(arg_name))
+
+
+def check_uint8(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT8_MIN, UINT8_MAX])
+
+
+def check_uint32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT32_MIN, UINT32_MAX])
+
+
+def check_pos_int32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [POS_INT_MIN, INT32_MAX], arg_name)
+
+
+def check_uint64(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT64_MIN, UINT64_MAX])
+
+
+def check_pos_int64(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT64_MIN, INT64_MAX])
+
+
+def check_pos_float32(value, arg_name=""):
+    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER], arg_name)
+
+
+def check_pos_float64(value, arg_name=""):
+    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER], arg_name)
+
+
+def check_valid_detype(type_):
+    if type_ not in valid_detype:
+        raise ValueError("Unknown column type")
+    return True
+
+
+def check_columns(columns, name):
+    """
+    Validate strings in column_names.
+
+    Args:
+        columns (list): list of column_names.
+        name (str): name of columns.
+
+    Returns:
+        Exception: when the value is not correct, otherwise nothing.
+    """
+    type_check(columns, (list, str), name)
+    if isinstance(columns, list):
+        if not columns:
+            raise ValueError("{0} should not be empty".format(name))
+        for i, column_name in enumerate(columns):
+            if not column_name:
+                raise ValueError("{0}[{1}] should not be empty".format(name, i))
+
+        col_names = ["{0}[{1}]".format(name, i) for i in range(len(columns))]
+        type_check_list(columns, (str,), col_names)
+        if len(set(columns)) != len(columns):
+            raise ValueError("Every column name should not be same with others in column_names.")
+
+
+def parse_user_args(method, *args, **kwargs):
+    """
+    Parse user arguments in a function.
+
+    Args:
+        method (method): a callable function.
+        *args: user passed args.
+        **kwargs: user passed kwargs.
+
+    Returns:
+        user_filled_args (list): values of what the user passed in for the arguments.
+        ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed.
+    """
+    sig = inspect.signature(method)
+    if 'self' in sig.parameters or 'cls' in sig.parameters:
+        ba = sig.bind(method, *args, **kwargs)
+        ba.apply_defaults()
+        params = list(sig.parameters.keys())[1:]
+    else:
+        ba = sig.bind(*args, **kwargs)
+        ba.apply_defaults()
+        params = list(sig.parameters.keys())
+
+    user_filled_args = [ba.arguments.get(arg_value) for arg_value in params]
+    return user_filled_args, ba.arguments
+
+
+def type_check_list(args, types, arg_names):
+    """
+    Check the type of each parameter in the list.
+
+    Args:
+        args (list, tuple): a list or tuple of any variable.
+        types (tuple): tuple of all valid types for arg.
+        arg_names (list, tuple of str): the names of args.
+
+    Returns:
+        Exception: when the type is not correct, otherwise nothing.
+    """
+    type_check(args, (list, tuple,), arg_names)
+    if len(args) != len(arg_names):
+        raise ValueError("List of arguments is not the same length as argument_names.")
+    for arg, arg_name in zip(args, arg_names):
+        type_check(arg, types, arg_name)
+
+
+def type_check(arg, types, arg_name):
+    """
+    Check the type of the parameter.
+
+    Args:
+        arg : any variable.
+        types (tuple): tuple of all valid types for arg.
+        arg_name (str): the name of arg.
+
+    Returns:
+        Exception: when the type is not correct, otherwise nothing.
+    """
+    # handle special case of booleans being a subclass of ints
+    print_value = '\"\"' if repr(arg) == repr('') else arg
+
+    if int in types and bool not in types:
+        if isinstance(arg, bool):
+            raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
+    if not isinstance(arg, types):
+        raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
+
+
+def check_filename(path):
+    """
+    check the filename in the path.
+
+    Args:
+        path (str): the path.
+
+    Returns:
+        Exception: when error.
+    """
+    if not isinstance(path, str):
+        raise TypeError("path: {} is not string".format(path))
+    filename = os.path.basename(path)
+
+    # '#', ':', '|', ' ', '}', '"', '+', '!', ']', '[', '\\', '`',
+    # '&', '.', '/', '@', "'", '^', ',', '_', '<', ';', '~', '>',
+    # '*', '(', '%', ')', '-', '=', '{', '?', '$'
+    forbidden_symbols = set(r'\/:*?"<>|`&\';')
+
+    if set(filename) & forbidden_symbols:
+        raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'")
+
+    if filename.startswith(' ') or filename.endswith(' '):
+        raise ValueError("filename should not start/end with space")
+
+    return True
+
+
+def check_dir(dataset_dir):
+    if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
+        raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
+
+
+def check_file(dataset_file):
+    check_filename(dataset_file)
+    if not os.path.isfile(dataset_file) or not os.access(dataset_file, os.R_OK):
+        raise ValueError("The file {} does not exist or permission denied!".format(dataset_file))
+
+
+def check_sampler_shuffle_shard_options(param_dict):
+    """
+    Check for valid shuffle, sampler, num_shards, and shard_id inputs.
+    Args:
+        param_dict (dict): param_dict.
+
+    Returns:
+        Exception: ValueError or RuntimeError if error.
+    """
+    shuffle, sampler = param_dict.get('shuffle'), param_dict.get('sampler')
+    num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id')
+
+    type_check(sampler, (type(None), samplers.BuiltinSampler, samplers.Sampler), "sampler")
+
+    if sampler is not None:
+        if shuffle is not None:
+            raise RuntimeError("sampler and shuffle cannot be specified at the same time.")
+        if num_shards is not None:
+            raise RuntimeError("sampler and sharding cannot be specified at the same time.")
+
+    if num_shards is not None:
+        check_pos_int32(num_shards)
+        if shard_id is None:
+            raise RuntimeError("num_shards is specified and currently requires shard_id as well.")
+        check_value(shard_id, [0, num_shards - 1], "shard_id")
+
+    if num_shards is None and shard_id is not None:
+        raise RuntimeError("shard_id is specified but num_shards is not.")
+
+
+def check_padding_options(param_dict):
+    """
+    Check for valid padded_sample and num_padded of padded samples.
+
+    Args:
+        param_dict (dict): param_dict.
+
+    Returns:
+        Exception: ValueError or RuntimeError if error.
+    """
+
+    columns_list = param_dict.get('columns_list')
+    block_reader = param_dict.get('block_reader')
+    padded_sample, num_padded = param_dict.get('padded_sample'), param_dict.get('num_padded')
+    if padded_sample is not None:
+        if num_padded is None:
+            raise RuntimeError("padded_sample is specified and requires num_padded as well.")
+        if num_padded < 0:
+            raise ValueError("num_padded is invalid, num_padded={}.".format(num_padded))
+        if columns_list is None:
+            raise RuntimeError("padded_sample is specified and requires columns_list as well.")
+        for column in columns_list:
+            if column not in padded_sample:
+                raise ValueError("padded_sample cannot match columns_list.")
+        if block_reader:
+            raise RuntimeError("block_reader and padded_sample cannot be specified at the same time.")
+
+    if padded_sample is None and num_padded is not None:
+        raise RuntimeError("num_padded is specified but padded_sample is not.")
+
+
+def check_num_parallel_workers(value):
+    type_check(value, (int,), "num_parallel_workers")
+    if value < 1 or value > cpu_count():
+        raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count()))
+
+
+def check_num_samples(value):
+    type_check(value, (int,), "num_samples")
+    check_value(value, [0, INT32_MAX], "num_samples")
+
+
+def validate_dataset_param_value(param_list, param_dict, param_type):
+    for param_name in param_list:
+        if param_dict.get(param_name) is not None:
+            if param_name == 'num_parallel_workers':
+                check_num_parallel_workers(param_dict.get(param_name))
+            if param_name == 'num_samples':
+                check_num_samples(param_dict.get(param_name))
+            else:
+                type_check(param_dict.get(param_name), (param_type,), param_name)
+
+
+def check_gnn_list_or_ndarray(param, param_name):
+    """
+    Check if the input parameter is list or numpy.ndarray.
+
+    Args:
+        param (list, nd.ndarray): param.
+        param_name (str): param_name.
+
+    Returns:
+        Exception: TypeError if error.
+    """
+
+    type_check(param, (list, np.ndarray), param_name)
+    if isinstance(param, list):
+        param_names = ["param_{0}".format(i) for i in range(len(param))]
+        type_check_list(param, (int,), param_names)
+
+    elif isinstance(param, np.ndarray):
+        if not param.dtype == np.int32:
+            raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
+                param_name, param.dtype))
diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py
index 674848f156..b3624e1ca3 100644
--- a/mindspore/dataset/engine/__init__.py
+++ b/mindspore/dataset/engine/__init__.py
@@ -26,10 +26,9 @@ from .datasets import *
 from .iterators import *
 from .serializer_deserializer import serialize, deserialize, show, compare
 from .samplers import *
-from ..core.configuration import config, ConfigurationManager
+from ..core import config
 
-__all__ = ["config", "ConfigurationManager", "zip",
-           "ImageFolderDatasetV2", "MnistDataset",
+__all__ = ["config", "zip", "ImageFolderDatasetV2", "MnistDataset",
            "MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset",
            "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
            "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler",
diff --git a/mindspore/dataset/engine/cache_client.py b/mindspore/dataset/engine/cache_client.py
new file mode 100644
index 0000000000..800c0dab1d
--- /dev/null
+++ b/mindspore/dataset/engine/cache_client.py
@@ -0,0 +1,49 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Cache client
+"""
+
+import copy
+from mindspore._c_dataengine import CacheClient
+
+class DatasetCache:
+    """
+    A client to interface with tensor caching service
+    """
+
+    def __init__(self, session_id=None, size=None, spilling=False):
+        if session_id is None:
+            raise RuntimeError("Session generation is not implemented yet. session id required")
+        self.size = size if size is not None else 0
+        if size < 0:
+            raise ValueError("cache size should be 0 or positive integer value but got: size={}".format(size))
+        if not isinstance(spilling, bool):
+            raise ValueError(
+                "spilling argument for cache should be a boolean value but got: spilling={}".format(spilling))
+        self.session_id = session_id
+        self.spilling = spilling
+        self.cache_client = CacheClient(session_id, size, spilling)
+
+    def __deepcopy__(self, memodict):
+        if id(self) in memodict:
+            return memodict[id(self)]
+        cls = self.__class__
+        new_cache = cls.__new__(cls)
+        memodict[id(self)] = new_cache
+        new_cache.session_id = copy.deepcopy(self.session_id, memodict)
+        new_cache.spilling = copy.deepcopy(self.spilling, memodict)
+        new_cache.size = copy.deepcopy(self.size, memodict)
+        new_cache.cache_client = self.cache_client
+        return new_cache
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index ae0dc6789e..846e7e0a56 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -38,13 +38,13 @@ from mindspore._c_expression import typing
 
 from mindspore import log as logger
 from . import samplers
-from .iterators import DictIterator, TupleIterator
+from .iterators import DictIterator, TupleIterator, DummyIterator
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
     check_rename, check_numpyslicesdataset, \
     check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_cocodataset, check_celebadataset, check_minddataset, \
     check_generatordataset, check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, \
-    check_split, check_bucket_batch_by_length, check_cluedataset
+    check_random_dataset, check_split, check_bucket_batch_by_length, check_cluedataset, check_positive_int32
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
 
 try:
@@ -146,6 +146,12 @@ class Dataset:
         self._num_classes = None
         self._repeat_count = None
         self._sync = False
+        self.ms_role = os.getenv("MS_ROLE")
+
+    def _noop_mode(self):
+        if self.ms_role in ("MS_PSERVER", "MS_SCHED"):
+            return True
+        return False
 
     def __add__(self, datasets):
         return self.concat(datasets)
@@ -386,7 +392,7 @@ class Dataset:
 
     @check_map
     def map(self, input_columns=None, operations=None, output_columns=None, columns_order=None,
-            num_parallel_workers=None, python_multiprocessing=False):
+            num_parallel_workers=None, python_multiprocessing=False, cache=None):
         """
         Apply each operation in operations to this dataset.
 
@@ -427,6 +433,7 @@ class Dataset:
                 parallel (default=None, the value from the config will be used).
             python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
                 option could be beneficial if the python operation is computational heavy (default=False).
+            cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
         Returns:
             MapDataset, dataset after mapping operation.
@@ -541,7 +548,7 @@ class Dataset:
             >>> ds_mapped = ds_pyfunc.map(input_columns, operations, output_columns, columns_order)
         """
         return MapDataset(self, input_columns, operations, output_columns, columns_order, num_parallel_workers,
-                          python_multiprocessing)
+                          python_multiprocessing, cache)
 
     @check_filter
     def filter(self, predicate, input_columns=None, num_parallel_workers=1):
@@ -939,6 +946,7 @@ class Dataset:
             raise TypeError("apply_func must return a dataset.")
         return dataset
 
+    @check_positive_int32
     def device_que(self, prefetch_size=None):
         """
         Return a transferredDataset that transfer data through device.
@@ -956,6 +964,7 @@ class Dataset:
         """
         return self.to_device()
 
+    @check_positive_int32
     def to_device(self, num_batch=None):
         """
         Transfer data through CPU, GPU or Ascend devices.
@@ -973,10 +982,14 @@ class Dataset:
         Raises:
             TypeError: If device_type is empty.
             ValueError: If device_type is not 'Ascend', 'GPU' or 'CPU'.
-            ValueError: If num_batch is None or 0 or larger than int_max.
+            ValueError: If num_batch is not positive or larger than int_max.
+            ValueError: If dataset size is None or 0.
             RuntimeError: If dataset is unknown.
             RuntimeError: If distribution file path is given but failed to read.
         """
+        if self.get_dataset_size() is None or 0:
+            raise ValueError("dataset size is None or 0.")
+
         if num_batch is None:
             num_batch = self.get_dataset_size()
             repeat_count = self.get_repeat_count()
@@ -995,8 +1008,8 @@ class Dataset:
         if device_type not in ('Ascend', 'GPU', 'CPU'):
             raise ValueError("Only support CPU, Ascend, GPU")
 
-        if num_batch is None or num_batch == 0:
-            raise ValueError("num_batch is None or 0.")
+        if num_batch == 0:
+            raise ValueError("num_batch is 0.")
 
         def get_distribution(output_dataset):
             dev_id = 0
@@ -1055,6 +1068,8 @@ class Dataset:
             >>>     # convert the returned tuple to a list and print
             >>>     print(list(item))
         """
+        if self._noop_mode():
+            return DummyIterator(self, 'tuple')
         return TupleIterator(self, columns)
 
     def create_dict_iterator(self):
@@ -1078,6 +1093,8 @@ class Dataset:
             >>>     print(item["column1"])
 
         """
+        if self._noop_mode():
+            return DummyIterator(self, 'dict')
         return DictIterator(self)
 
     def __iter__(self):
@@ -1556,7 +1573,7 @@ class BatchDataset(DatasetOp):
             Number, number of batches.
         """
         child_size = self.children[0].get_dataset_size()
-        if child_size is not None:
+        if child_size is not None and isinstance(self.batch_size, int):
             if self.drop_remainder:
                 return math.floor(child_size / self.batch_size)
             return math.ceil(child_size / self.batch_size)
@@ -1862,13 +1879,14 @@ class MapDataset(DatasetOp):
             in parallel (default=None).
         python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
             option could be beneficial if the python operation is computational heavy (default=False).
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
         Raises:
             ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified.
     """
 
     def __init__(self, input_dataset, input_columns=None, operations=None, output_columns=None, columns_order=None,
-                 num_parallel_workers=None, python_multiprocessing=False):
+                 num_parallel_workers=None, python_multiprocessing=False, cache=None):
         super().__init__(num_parallel_workers)
         self.children.append(input_dataset)
         if input_columns is not None and not isinstance(input_columns, list):
@@ -1880,6 +1898,7 @@ class MapDataset(DatasetOp):
         if output_columns is not None and not isinstance(output_columns, list):
             output_columns = [output_columns]
         self.output_columns = output_columns
+        self.cache = cache
         self.columns_order = columns_order
 
         if self.input_columns and self.output_columns \
@@ -1898,6 +1917,7 @@ class MapDataset(DatasetOp):
         args["operations"] = self.operations
         args["output_columns"] = self.output_columns
         args["columns_order"] = self.columns_order
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
         return args
 
     def get_dataset_size(self):
@@ -1923,6 +1943,7 @@ class MapDataset(DatasetOp):
         new_op.parent = copy.deepcopy(self.parent, memodict)
         new_op.input_indexs = copy.deepcopy(self._input_indexs, memodict)
         new_op.python_multiprocessing = copy.deepcopy(self.python_multiprocessing, memodict)
+        new_op.cache = copy.deepcopy(self.cache, memodict)
         new_op.operations = self.operations
         return new_op
 
@@ -2307,6 +2328,8 @@ class TransferDataset(DatasetOp):
 
     def send(self):
         # need to keep iterator alive so the executionTree is not destroyed
+        if self._noop_mode():
+            return
         self.iterator = TupleIterator(self)
 
 
@@ -2340,7 +2363,7 @@ class RangeDataset(MappableDataset):
         return False
 
 
-def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
+def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id, non_mappable=False):
     """
     Create sampler based on user input.
 
@@ -2350,7 +2373,11 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
         shuffle (bool): Shuffle.
         num_shards (int): Number of shard for sharding.
         shard_id (int): Shard ID.
+        non_mappable (bool, optional): Indicate if caller is non-mappable dataset for special handling (default=False).
     """
+    if non_mappable is True and all(arg is None for arg in [num_samples, shuffle, num_shards, shard_id, input_sampler]):
+        return None
+
     if input_sampler is not None:
         # If the user provided a sampler, then it doesn't matter what the other args are because
         # we are being asked specifically to use the given sampler.
@@ -2363,7 +2390,7 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
         if (isinstance(input_sampler, (samplers.SequentialSampler, samplers.DistributedSampler,
                                        samplers.RandomSampler, samplers.SubsetRandomSampler,
                                        samplers.WeightedRandomSampler, samplers.Sampler)) and
-                (num_shards is not None or shard_id is not None or shuffle is not None or num_samples is not None)):
+                (any(arg is not None for arg in [num_shards, shard_id, shuffle, num_samples]))):
             raise ValueError(
                 'Conflicting arguments during sampler assignments. num_samples: {}, num_shards: {},'
                 ' shard_id: {}, shuffle: {})'.format(num_samples, num_shards, shard_id, shuffle))
@@ -2452,6 +2479,7 @@ class ImageFolderDatasetV2(MappableDataset):
             into (default=None).
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument should be specified only when num_shards is also specified.
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
     Raises:
         RuntimeError: If sampler and shuffle are specified at the same time.
@@ -2476,7 +2504,7 @@ class ImageFolderDatasetV2(MappableDataset):
     @check_imagefolderdatasetv2
     def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None,
                  shuffle=None, sampler=None, extensions=None, class_indexing=None,
-                 decode=False, num_shards=None, shard_id=None):
+                 decode=False, num_shards=None, shard_id=None, cache=None):
         super().__init__(num_parallel_workers)
 
         self.dataset_dir = dataset_dir
@@ -2488,6 +2516,7 @@ class ImageFolderDatasetV2(MappableDataset):
         self.decode = decode
         self.num_shards = num_shards
         self.shard_id = shard_id
+        self.cache = cache
 
     def get_args(self):
         args = super().get_args()
@@ -2500,6 +2529,7 @@ class ImageFolderDatasetV2(MappableDataset):
         args["decode"] = self.decode
         args["num_shards"] = self.num_shards
         args["shard_id"] = self.shard_id
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
         return args
 
     def get_dataset_size(self):
@@ -3245,6 +3275,7 @@ class TFRecordDataset(SourceDataset):
             argument should be specified only when num_shards is also specified.
         shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number
             of rows of each shard may be not equal.
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
     Examples:
         >>> import mindspore.dataset as ds
         >>> import mindspore.common.dtype as mstype
@@ -3262,7 +3293,7 @@ class TFRecordDataset(SourceDataset):
 
     @check_tfrecorddataset
     def __init__(self, dataset_files, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
-                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False):
+                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None):
         super().__init__(num_parallel_workers)
         self.dataset_files = self._find_files(dataset_files)
         self.dataset_files.sort()
@@ -3274,6 +3305,7 @@ class TFRecordDataset(SourceDataset):
         self.schema = schema
         self.columns_list = columns_list
         self.num_samples = num_samples
+        self.cache = cache
         if schema_obj is not None and num_samples is None:
             self.num_samples = schema_obj.num_rows
 
@@ -3289,6 +3321,14 @@ class TFRecordDataset(SourceDataset):
         else:
             self.shuffle_level = shuffle
             self.shuffle_files = True
+
+        # The TF record dataset does not directly support a sampler.  It has provided sampling arguments
+        # (shuffle, num_samples, num_shards, shard_id) and it DOES support sampling if somewhere above it in
+        # the pipeline contains a cache.  If there is no cache above it, then this sampler is not used.
+        sampler_shuffle = self.shuffle_files
+        sampler = None
+        self.sampler = _select_sampler(self.num_samples, sampler, sampler_shuffle, num_shards, shard_id,
+                                       non_mappable=True)
         self.shard_equal_rows = shard_equal_rows
 
     def get_args(self):
@@ -3312,6 +3352,8 @@ class TFRecordDataset(SourceDataset):
         args["num_shards"] = self.num_shards
         args["shard_id"] = self.shard_id
         args["shard_equal_rows"] = self.shard_equal_rows
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
+        args["sampler"] = self.sampler
         return args
 
     def get_dataset_size(self, estimate=False):
@@ -3797,43 +3839,61 @@ class RandomDataset(SourceDataset):
     A source dataset that generates random data.
 
     Args:
-        num_samples (int): number of samples to generate.
+        total_rows (int): number of rows for the dataset to generate (default=None, number of rows is random)
         schema (str or Schema, optional): Path to the json schema file or schema object (default=None).
             If the schema is not provided, the random dataset generates a random schema.
         columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
+        num_samples (int): number of samples to draw from the total. (default=None, which means all rows)
         num_parallel_workers (int, optional): number of workers to read the data
             (default=None, number set in the config).
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset should be divided
+            into (default=None).
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument should be specified only when num_shards is also specified.
     """
 
-    def __init__(self, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None):
+    @check_random_dataset
+    def __init__(self, total_rows=None, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
+                 cache=None, shuffle=None, num_shards=None, shard_id=None):
         super().__init__(num_parallel_workers)
         schema_obj = None
         if (schema is not None) and (not isinstance(schema, Schema)):
             schema_obj = Schema(schema)  # read the schema file and convert to schema object to validate it
         self.schema = schema
         self.columns_list = columns_list
-        if schema_obj is not None and num_samples is None:
-            self.num_samples = schema_obj.num_rows
-        elif num_samples is None:
-            self.num_samples = 0
+        sampler = None
+        self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id, non_mappable=True)
+        self.num_samples = num_samples
+        self.cache = cache
+        if schema_obj is not None and total_rows is None:
+            self.total_rows = schema_obj.num_rows
+        elif total_rows is None:
+            self.total_rows = 0
         else:
-            self.num_samples = num_samples
+            self.total_rows = total_rows
+        self.num_shards = num_shards
+        self.shard_id = shard_id
+        self.shuffle_level = shuffle
 
     def get_args(self):
         args = super().get_args()
         if self.schema is not None:
             if isinstance(self.schema, Schema):
                 self.schema.datasetType = 'Random'
-                if self.num_samples is not None:
-                    self.schema.num_rows = self.num_samples
+                if self.total_rows is not None:
+                    self.schema.num_rows = self.total_rows
                 args["schema_json_string"] = self.schema.to_json()
             else:
                 args["schema_file_path"] = self.schema
         args["schema"] = self.schema
-        if self.columns_list is not None:
-            args["columns_list"] = self.columns_list
-        if self.num_samples is not None:
-            args["num_samples"] = self.num_samples
+        args["columns_list"] = self.columns_list
+        args["num_samples"] = self.num_samples
+        args["total_rows"] = self.total_rows
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
+        args["sampler"] = self.sampler
         return args
 
     def get_dataset_size(self):
@@ -3843,18 +3903,28 @@ class RandomDataset(SourceDataset):
         Return:
             Number, number of batches.
         """
+
+        num_rows = CifarOp.get_num_rows(self.dataset_dir, True)
+
+        rows_per_shard = get_num_rows(num_rows, self.num_shards)
         rows_from_sampler = self._get_sampler_dataset_size()
 
         if rows_from_sampler is None:
-            return self.num_samples
+            return rows_per_shard
 
-        return min(rows_from_sampler, self.num_samples)
+        return min(rows_from_sampler, rows_per_shard)
 
     def is_shuffled(self):
-        return True
+        if self.shuffle_level is None:
+            return True
+
+        return self.shuffle_level or self.sampler.is_shuffled()
 
     def is_sharded(self):
-        return False
+        if self.num_shards is not None:
+            return self.num_shards > 1
+
+        return self.sampler.is_sharded()
 
 
 class Schema:
diff --git a/mindspore/dataset/engine/graphdata.py b/mindspore/dataset/engine/graphdata.py
index 472819784e..81314b4373 100644
--- a/mindspore/dataset/engine/graphdata.py
+++ b/mindspore/dataset/engine/graphdata.py
@@ -22,7 +22,8 @@ from mindspore._c_dataengine import Tensor
 
 from .validators import check_gnn_graphdata, check_gnn_get_all_nodes, check_gnn_get_all_edges, \
     check_gnn_get_nodes_from_edges, check_gnn_get_all_neighbors, check_gnn_get_sampled_neighbors, \
-    check_gnn_get_neg_sampled_neighbors, check_gnn_get_node_feature, check_gnn_random_walk
+    check_gnn_get_neg_sampled_neighbors, check_gnn_get_node_feature, check_gnn_get_edge_feature, \
+    check_gnn_random_walk
 
 
 class GraphData:
@@ -127,7 +128,13 @@ class GraphData:
     @check_gnn_get_sampled_neighbors
     def get_sampled_neighbors(self, node_list, neighbor_nums, neighbor_types):
         """
-        Get sampled neighbor information, maximum support 6-hop sampling.
+        Get sampled neighbor information.
+
+        The api supports multi-hop neighbor sampling. That is, the previous sampling result is used as the input of
+        next-hop sampling. A maximum of 6-hop are allowed.
+
+        The sampling result is tiled into a list in the format of [input node, 1-hop sampling result,
+        2-hop samling result ...]
 
         Args:
             node_list (list or numpy.ndarray): The given list of nodes.
@@ -207,6 +214,35 @@ class GraphData:
                 Tensor(node_list),
                 feature_types)]
 
+    @check_gnn_get_edge_feature
+    def get_edge_feature(self, edge_list, feature_types):
+        """
+        Get `feature_types` feature of the edges in `edge_list`.
+
+        Args:
+            edge_list (list or numpy.ndarray): The given list of edges.
+            feature_types (list or ndarray): The given list of feature types.
+
+        Returns:
+            numpy.ndarray: array of features.
+
+        Examples:
+            >>> import mindspore.dataset as ds
+            >>> data_graph = ds.GraphData('dataset_file', 2)
+            >>> edges = data_graph.get_all_edges(0)
+            >>> features = data_graph.get_edge_feature(edges, [1])
+
+        Raises:
+            TypeError: If `edge_list` is not list or ndarray.
+            TypeError: If `feature_types` is not list or ndarray.
+        """
+        if isinstance(edge_list, list):
+            edge_list = np.array(edge_list, dtype=np.int32)
+        return [
+            t.as_array() for t in self._graph.get_edge_feature(
+                Tensor(edge_list),
+                feature_types)]
+
     def graph_info(self):
         """
         Get the meta information of the graph, including the number of nodes, the type of nodes,
@@ -232,9 +268,10 @@ class GraphData:
         Args:
             target_nodes (list[int]): Start node list in random walk
             meta_path (list[int]): node type for each walk step
-            step_home_param (float): return hyper parameter in node2vec algorithm
-            step_away_param (float): inout hyper parameter in node2vec algorithm
-            default_node (int): default node if no more neighbors found
+            step_home_param (float, optional): return hyper parameter in node2vec algorithm (Default = 1.0).
+            step_away_param (float, optional): inout hyper parameter in node2vec algorithm (Default = 1.0).
+            default_node (int, optional): default node if no more neighbors found (Default = -1).
+                A default value of -1 indicates that no node is given.
 
         Returns:
             numpy.ndarray: array of nodes.
diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index 1d2d28c1c0..a2a23cbb44 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -17,7 +17,9 @@
 from abc import abstractmethod
 import copy
 import weakref
+import numpy as np
 
+from mindspore.common.tensor import Tensor
 from mindspore._c_dataengine import DEPipeline
 from mindspore._c_dataengine import OpName
 
@@ -287,3 +289,32 @@ class TupleIterator(Iterator):
         """
 
         return [t.as_array() for t in self.depipeline.GetNextAsList()]
+
+
+class DummyIterator():
+    """
+    A DummyIterator only work when env MS_ROLE="MS_PSERVER" or MS_ROLE="MS_SCHED"
+    """
+    def __init__(self, dataset, mode):
+        self.mode = mode
+        self.shapes = dataset.output_shapes()
+        self.types = dataset.output_types()
+        self.fetched_first = False
+
+    def __get_tensor(self):
+        tensor_row = []
+        for np_shape, np_type in zip(self.shapes, self.types):
+            input_np = np.zeros(np_shape, np_type)
+            tensor = Tensor(input_np)
+            tensor_row.append(tensor)
+        return tensor_row
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.mode == "tuple":
+            if not self.fetched_first:
+                self.fetched_first = True
+                return self.__get_tensor()
+        raise StopIteration()
diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py
index 9d3339e26d..8fd3a2bb9b 100644
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -22,7 +22,7 @@ import sys
 from mindspore import log as logger
 from . import datasets as de
 from ..transforms.vision.utils import Inter, Border
-from ..core.configuration import config
+from ..core import config
 
 def serialize(dataset, json_filepath=None):
     """
@@ -173,7 +173,9 @@ def traverse(node):
     #    num_samples, shard_id, num_shards, shuffle
     # These arguments get moved into the sampler itself, so they are no longer needed to
     # be set at the dataset level.
-    if 'sampler' in node_args.keys():
+    # TF Record is a special case because it uses both the dataset and sampler arguments
+    # which is not decided until later during tree preparation phase.
+    if node_repr['op_type'] != 'TFRecordDataset' and 'sampler' in node_args.keys():
         if 'num_samples' in node_repr.keys():
             node_repr['num_samples'] = None
         if 'shuffle' in node_repr.keys():
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 744a9b94be..29904f1a9e 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -9,335 +9,151 @@
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# See the License foNtest_resr the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
-"""Built-in validators.
+"""
+Built-in validators.
 """
 import inspect as ins
 import os
 from functools import wraps
-from multiprocessing import cpu_count
 
 import numpy as np
 from mindspore._c_expression import typing
+from ..core.validator_helpers import parse_user_args, type_check, type_check_list, check_value, \
+    INT32_MAX, check_valid_detype, check_dir, check_file, check_sampler_shuffle_shard_options, \
+    validate_dataset_param_value, check_padding_options, check_gnn_list_or_ndarray, check_num_parallel_workers, \
+    check_columns, check_pos_int32
 
 from . import datasets
 from . import samplers
+from . import cache_client
 
-INT32_MAX = 2147483647
-valid_detype = [
-    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
-    "uint32", "uint64", "float16", "float32", "float64", "string"
-]
-
-
-def check_valid_detype(type_):
-    if type_ not in valid_detype:
-        raise ValueError("Unknown column type")
-    return True
-
-
-def check_filename(path):
-    """
-    check the filename in the path
-
-    Args:
-        path (str): the path
-
-    Returns:
-        Exception: when error
-    """
-    if not isinstance(path, str):
-        raise TypeError("path: {} is not string".format(path))
-    filename = os.path.basename(path)
-
-    # '#', ':', '|', ' ', '}', '"', '+', '!', ']', '[', '\\', '`',
-    # '&', '.', '/', '@', "'", '^', ',', '_', '<', ';', '~', '>',
-    # '*', '(', '%', ')', '-', '=', '{', '?', '$'
-    forbidden_symbols = set(r'\/:*?"<>|`&\';')
-
-    if set(filename) & forbidden_symbols:
-        raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'")
-
-    if filename.startswith(' ') or filename.endswith(' '):
-        raise ValueError("filename should not start/end with space")
-
-    return True
-
-
-def make_param_dict(method, args, kwargs):
-    """Return a dictionary of the method's args and kwargs."""
-    sig = ins.signature(method)
-    params = sig.parameters
-    keys = list(params.keys())
-    param_dict = dict()
-    try:
-        for name, value in enumerate(args):
-            param_dict[keys[name]] = value
-    except IndexError:
-        raise TypeError("{0}() expected {1} arguments, but {2} were given".format(
-            method.__name__, len(keys) - 1, len(args) - 1))
-
-    param_dict.update(zip(params.keys(), args))
-    param_dict.update(kwargs)
-
-    for name, value in params.items():
-        if name not in param_dict:
-            param_dict[name] = value.default
-    return param_dict
-
-
-def check_type(param, param_name, valid_type):
-    if (not isinstance(param, valid_type)) or (valid_type == int and isinstance(param, bool)):
-        raise TypeError("Wrong input type for {0}, should be {1}, got {2}".format(param_name, valid_type, type(param)))
-
-
-def check_param_type(param_list, param_dict, param_type):
-    for param_name in param_list:
-        if param_dict.get(param_name) is not None:
-            if param_name == 'num_parallel_workers':
-                check_num_parallel_workers(param_dict.get(param_name))
-            if param_name == 'num_samples':
-                check_num_samples(param_dict.get(param_name))
-            else:
-                check_type(param_dict.get(param_name), param_name, param_type)
-
-
-def check_positive_int32(param, param_name):
-    check_interval_closed(param, param_name, [1, INT32_MAX])
-
-
-def check_interval_closed(param, param_name, valid_range):
-    if param < valid_range[0] or param > valid_range[1]:
-        raise ValueError("The value of {0} exceeds the closed interval range {1}.".format(param_name, valid_range))
-
-
-def check_num_parallel_workers(value):
-    check_type(value, 'num_parallel_workers', int)
-    if value < 1 or value > cpu_count():
-        raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count()))
-
-
-def check_num_samples(value):
-    check_type(value, 'num_samples', int)
-    if value < 0:
-        raise ValueError("num_samples cannot be less than 0!")
-
-
-def check_dataset_dir(dataset_dir):
-    if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
-        raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
-
-
-def check_dataset_file(dataset_file):
-    check_filename(dataset_file)
-    if not os.path.isfile(dataset_file) or not os.access(dataset_file, os.R_OK):
-        raise ValueError("The file {} does not exist or permission denied!".format(dataset_file))
-
-
-def check_sampler_shuffle_shard_options(param_dict):
-    """check for valid shuffle, sampler, num_shards, and shard_id inputs."""
-    shuffle, sampler = param_dict.get('shuffle'), param_dict.get('sampler')
-    num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id')
-
-    if sampler is not None and not isinstance(sampler, (samplers.BuiltinSampler, samplers.Sampler)):
-        raise TypeError("sampler is not a valid Sampler type.")
-
-    if sampler is not None:
-        if shuffle is not None:
-            raise RuntimeError("sampler and shuffle cannot be specified at the same time.")
-
-        if num_shards is not None:
-            raise RuntimeError("sampler and sharding cannot be specified at the same time.")
-
-    if num_shards is not None:
-        check_positive_int32(num_shards, "num_shards")
-        if shard_id is None:
-            raise RuntimeError("num_shards is specified and currently requires shard_id as well.")
-        if shard_id < 0 or shard_id >= num_shards:
-            raise ValueError("shard_id is invalid, shard_id={}".format(shard_id))
-
-    if num_shards is None and shard_id is not None:
-        raise RuntimeError("shard_id is specified but num_shards is not.")
-
-
-def check_padding_options(param_dict):
-    """ check for valid padded_sample and num_padded of padded samples"""
-    columns_list = param_dict.get('columns_list')
-    block_reader = param_dict.get('block_reader')
-    padded_sample, num_padded = param_dict.get('padded_sample'), param_dict.get('num_padded')
-    if padded_sample is not None:
-        if num_padded is None:
-            raise RuntimeError("padded_sample is specified and requires num_padded as well.")
-        if num_padded < 0:
-            raise ValueError("num_padded is invalid, num_padded={}.".format(num_padded))
-        if columns_list is None:
-            raise RuntimeError("padded_sample is specified and requires columns_list as well.")
-        for column in columns_list:
-            if column not in padded_sample:
-                raise ValueError("padded_sample cannot match columns_list.")
-        if block_reader:
-            raise RuntimeError("block_reader and padded_sample cannot be specified at the same time.")
-
-    if padded_sample is None and num_padded is not None:
-        raise RuntimeError("num_padded is specified but padded_sample is not.")
 
 def check_imagefolderdatasetv2(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ImageFolderDatasetV2)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ImageFolderDatasetV2)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_list = ['extensions']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        check_dir(dataset_dir)
 
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_mnist_cifar_dataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset, Cifar10/100Dataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ManifestDataset, Cifar10/100Dataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
-
-        check_param_type(nreq_param_int, param_dict, int)
+        check_dir(dataset_dir)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_manifestdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ManifestDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_str = ['usage']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_file; required argument
         dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
-        check_dataset_file(dataset_file)
-
-        check_param_type(nreq_param_int, param_dict, int)
+        check_file(dataset_file)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_str, param_dict, str)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_str, param_dict, str)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_tfrecorddataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(TFRecordDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(TFRecordDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_list = ['columns_list']
         nreq_param_bool = ['shard_equal_rows']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
         if not isinstance(dataset_files, (str, list)):
             raise TypeError("dataset_files should be of type str or a list of strings.")
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_vocdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(VOCDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(VOCDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
-        # check task; required argument
+        check_dir(dataset_dir)
+
         task = param_dict.get('task')
-        if task is None:
-            raise ValueError("task is not provided.")
-        if not isinstance(task, str):
-            raise TypeError("task is not str type.")
-        # check mode; required argument
+        type_check(task, (str,), "task")
+
         mode = param_dict.get('mode')
-        if mode is None:
-            raise ValueError("mode is not provided.")
-        if not isinstance(mode, str):
-            raise TypeError("mode is not str type.")
+        type_check(mode, (str,), "mode")
 
-        imagesets_file = ""
         if task == "Segmentation":
             imagesets_file = os.path.join(dataset_dir, "ImageSets", "Segmentation", mode + ".txt")
             if param_dict.get('class_indexing') is not None:
@@ -347,92 +163,74 @@ def check_vocdataset(method):
         else:
             raise ValueError("Invalid task : " + task)
 
-        check_dataset_file(imagesets_file)
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        check_file(imagesets_file)
 
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_cocodataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CocoDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CocoDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
+        check_dir(dataset_dir)
 
-        # check annotation_file; required argument
         annotation_file = param_dict.get('annotation_file')
-        if annotation_file is None:
-            raise ValueError("annotation_file is not provided.")
-        check_dataset_file(annotation_file)
+        check_file(annotation_file)
 
-        # check task; required argument
         task = param_dict.get('task')
-        if task is None:
-            raise ValueError("task is not provided.")
-        if not isinstance(task, str):
-            raise TypeError("task is not str type.")
+        type_check(task, (str,), "task")
 
         if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}:
             raise ValueError("Invalid task type")
 
-        check_param_type(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         sampler = param_dict.get('sampler')
         if sampler is not None and isinstance(sampler, samplers.PKSampler):
             raise ValueError("CocoDataset doesn't support PKSampler")
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_celebadataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CelebADataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_list = ['extensions']
         nreq_param_str = ['dataset_type']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
 
-        check_param_type(nreq_param_int, param_dict, int)
+        check_dir(dataset_dir)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_str, param_dict, str)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_str, param_dict, str)
 
         dataset_type = param_dict.get('dataset_type')
         if dataset_type is not None and dataset_type not in ('all', 'train', 'valid', 'test'):
@@ -444,67 +242,58 @@ def check_celebadataset(method):
         if sampler is not None and isinstance(sampler, samplers.PKSampler):
             raise ValueError("CelebADataset does not support PKSampler.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_minddataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(MindDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(MindDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'seed', 'num_shards', 'shard_id', 'num_padded']
         nreq_param_list = ['columns_list']
         nreq_param_bool = ['block_reader']
         nreq_param_dict = ['padded_sample']
 
-        # check dataset_file; required argument
         dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
         if isinstance(dataset_file, list):
             for f in dataset_file:
-                check_dataset_file(f)
+                check_file(f)
         else:
-            check_dataset_file(dataset_file)
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
+            check_file(dataset_file)
 
-        check_param_type(nreq_param_dict, param_dict, dict)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
 
         check_sampler_shuffle_shard_options(param_dict)
 
         check_padding_options(param_dict)
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_generatordataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(GeneratorDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(GeneratorDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
-        # check generator_function; required argument
         source = param_dict.get('source')
-        if source is None:
-            raise ValueError("source is not provided.")
+
         if not callable(source):
             try:
                 iter(source)
             except TypeError:
                 raise TypeError("source should be callable, iterable or random accessible")
 
-        # check column_names or schema; required argument
         column_names = param_dict.get('column_names')
         if column_names is not None:
             check_columns(column_names, "column_names")
@@ -518,11 +307,11 @@ def check_generatordataset(method):
 
         # check optional argument
         nreq_param_int = ["num_samples", "num_parallel_workers", "num_shards", "shard_id"]
-        check_param_type(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         nreq_param_list = ["column_types"]
-        check_param_type(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
         nreq_param_bool = ["shuffle"]
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         num_shards = param_dict.get("num_shards")
         shard_id = param_dict.get("shard_id")
@@ -530,9 +319,9 @@ def check_generatordataset(method):
             # These two parameters appear together.
             raise ValueError("num_shards and shard_id need to be passed in together")
         if num_shards is not None:
-            check_positive_int32(num_shards, "num_shards")
+            check_pos_int32(num_shards, "num_shards")
             if shard_id >= num_shards:
-                raise ValueError("shard_id should be less than num_shards")
+                raise ValueError("shard_id should be less than num_shards.")
 
         sampler = param_dict.get("sampler")
         if sampler is not None:
@@ -551,81 +340,73 @@ def check_generatordataset(method):
         if num_shards is not None and not hasattr(source, "__getitem__"):
             raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
+def check_random_dataset(method):
+    """A wrapper that wraps a parameter checker to the original Dataset(RandomDataset)."""
 
-def check_batch_size(batch_size):
-    if not (isinstance(batch_size, int) or (callable(batch_size))):
-        raise TypeError("batch_size should either be an int or a callable.")
-    if callable(batch_size):
-        sig = ins.signature(batch_size)
-        if len(sig.parameters) != 1:
-            raise ValueError("batch_size callable should take one parameter (BatchInfo).")
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
+        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id', 'total_rows']
+        nreq_param_bool = ['shuffle']
+        nreq_param_list = ['columns_list']
 
-def check_count(count):
-    check_type(count, 'count', int)
-    if (count <= 0 and count != -1) or count > INT32_MAX:
-        raise ValueError("count should be either -1 or positive integer.")
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
 
+        check_sampler_shuffle_shard_options(param_dict)
+
+        return method(self, *args, **kwargs)
 
-def check_columns(columns, name):
-    if isinstance(columns, list):
-        for column in columns:
-            if not isinstance(column, str):
-                raise TypeError("Each column in {0} should be of type str. Got {1}.".format(name, type(column)))
-    elif not isinstance(columns, str):
-        raise TypeError("{} should be either a list of strings or a single string.".format(name))
+    return new_method
 
 
 def check_pad_info(key, val):
     """check the key and value pair of pad_info in batch"""
-    check_type(key, "key in pad_info", str)
+    type_check(key, (str,), "key in pad_info")
+
     if val is not None:
         assert len(val) == 2, "value of pad_info should be a tuple of size 2"
-        check_type(val, "value in pad_info", tuple)
+        type_check(val, (tuple,), "value in pad_info")
+
         if val[0] is not None:
-            check_type(val[0], "pad_shape", list)
+            type_check(val[0], (list,), "pad_shape")
+
             for dim in val[0]:
                 if dim is not None:
-                    check_type(dim, "dim in pad_shape", int)
+                    type_check(dim, (int,), "dim in pad_shape")
                     assert dim > 0, "pad shape should be positive integers"
         if val[1] is not None:
-            check_type(val[1], "pad_value", (int, float, str, bytes))
+            type_check(val[1], (int, float, str, bytes), "pad_value")
 
 
 def check_bucket_batch_by_length(method):
     """check the input arguments of bucket_batch_by_length."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [column_names, bucket_boundaries, bucket_batch_sizes, element_length_function, pad_info,
+         pad_to_bucket_boundary, drop_remainder], _ = parse_user_args(method, *args, **kwargs)
 
         nreq_param_list = ['column_names', 'bucket_boundaries', 'bucket_batch_sizes']
-        check_param_type(nreq_param_list, param_dict, list)
+
+        type_check_list([column_names, bucket_boundaries, bucket_batch_sizes], (list,), nreq_param_list)
 
         nbool_param_list = ['pad_to_bucket_boundary', 'drop_remainder']
-        check_param_type(nbool_param_list, param_dict, bool)
+        type_check_list([pad_to_bucket_boundary, drop_remainder], (bool,), nbool_param_list)
 
         # check column_names: must be list of string.
-        column_names = param_dict.get("column_names")
-
-        if not column_names:
-            raise ValueError("column_names cannot be empty")
+        check_columns(column_names, "column_names")
 
-        all_string = all(isinstance(item, str) for item in column_names)
-        if not all_string:
-            raise TypeError("column_names should be a list of str.")
-
-        element_length_function = param_dict.get("element_length_function")
         if element_length_function is None and len(column_names) != 1:
             raise ValueError("If element_length_function is not specified, exactly one column name should be passed.")
 
         # check bucket_boundaries: must be list of int, positive and strictly increasing
-        bucket_boundaries = param_dict.get('bucket_boundaries')
-
         if not bucket_boundaries:
             raise ValueError("bucket_boundaries cannot be empty.")
 
@@ -633,16 +414,15 @@ def check_bucket_batch_by_length(method):
         if not all_int:
             raise TypeError("bucket_boundaries should be a list of int.")
 
-        all_non_negative = all(item >= 0 for item in bucket_boundaries)
+        all_non_negative = all(item > 0 for item in bucket_boundaries)
         if not all_non_negative:
-            raise ValueError("bucket_boundaries cannot contain any negative numbers.")
+            raise ValueError("bucket_boundaries must only contain positive numbers.")
 
         for i in range(len(bucket_boundaries) - 1):
             if not bucket_boundaries[i + 1] > bucket_boundaries[i]:
                 raise ValueError("bucket_boundaries should be strictly increasing.")
 
         # check bucket_batch_sizes: must be list of int and positive
-        bucket_batch_sizes = param_dict.get('bucket_batch_sizes')
         if len(bucket_batch_sizes) != len(bucket_boundaries) + 1:
             raise ValueError("bucket_batch_sizes must contain one element more than bucket_boundaries.")
 
@@ -654,12 +434,13 @@ def check_bucket_batch_by_length(method):
         if not all_non_negative:
             raise ValueError("bucket_batch_sizes should be a list of positive numbers.")
 
-        if param_dict.get('pad_info') is not None:
-            check_type(param_dict["pad_info"], "pad_info", dict)
-            for k, v in param_dict.get('pad_info').items():
+        if pad_info is not None:
+            type_check(pad_info, (dict,), "pad_info")
+
+            for k, v in pad_info.items():
                 check_pad_info(k, v)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -668,37 +449,33 @@ def check_batch(method):
     """check the input arguments of batch."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        nreq_param_int = ['num_parallel_workers']
-        nreq_param_bool = ['drop_remainder']
-        nreq_param_columns = ['input_columns']
+    def new_method(self, *args, **kwargs):
+        [batch_size, drop_remainder, num_parallel_workers, per_batch_map,
+         input_columns, pad_info], param_dict = parse_user_args(method, *args, **kwargs)
 
-        # check batch_size; required argument
-        batch_size = param_dict.get("batch_size")
-        if batch_size is None:
-            raise ValueError("batch_size is not provided.")
-        check_batch_size(batch_size)
+        if not (isinstance(batch_size, int) or (callable(batch_size))):
+            raise TypeError("batch_size should either be an int or a callable.")
 
-        check_param_type(nreq_param_int, param_dict, int)
+        if callable(batch_size):
+            sig = ins.signature(batch_size)
+            if len(sig.parameters) != 1:
+                raise ValueError("batch_size callable should take one parameter (BatchInfo).")
 
-        check_param_type(nreq_param_bool, param_dict, bool)
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        type_check(drop_remainder, (bool,), "drop_remainder")
 
-        if (param_dict.get('pad_info') is not None) and (param_dict.get('per_batch_map') is not None):
+        if (pad_info is not None) and (per_batch_map is not None):
             raise ValueError("pad_info and per_batch_map can't both be set")
 
-        if param_dict.get('pad_info') is not None:
-            check_type(param_dict["pad_info"], "pad_info", dict)
+        if pad_info is not None:
+            type_check(param_dict["pad_info"], (dict,), "pad_info")
             for k, v in param_dict.get('pad_info').items():
                 check_pad_info(k, v)
 
-        for param_name in nreq_param_columns:
-            param = param_dict.get(param_name)
-            if param is not None:
-                check_columns(param, param_name)
+        if input_columns is not None:
+            check_columns(input_columns, "input_columns")
 
-        per_batch_map, input_columns = param_dict.get('per_batch_map'), param_dict.get('input_columns')
         if (per_batch_map is None) != (input_columns is None):
             # These two parameters appear together.
             raise ValueError("per_batch_map and input_columns need to be passed in together.")
@@ -709,43 +486,38 @@ def check_batch(method):
             if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1):
                 raise ValueError("the signature of per_batch_map should match with input columns")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
+
 def check_sync_wait(method):
     """check the input arguments of sync_wait."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        nreq_param_str = ['condition_name']
-        nreq_param_int = ['step_size']
+    def new_method(self, *args, **kwargs):
+        [condition_name, num_batch, _], _ = parse_user_args(method, *args, **kwargs)
 
-        check_param_type(nreq_param_int, param_dict, int)
+        type_check(condition_name, (str,), "condition_name")
+        type_check(num_batch, (int,), "num_batch")
 
-        check_param_type(nreq_param_str, param_dict, str)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
+
 def check_shuffle(method):
     """check the input arguments of shuffle."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [buffer_size], _ = parse_user_args(method, *args, **kwargs)
 
-        # check buffer_size; required argument
-        buffer_size = param_dict.get("buffer_size")
-        if buffer_size is None:
-            raise ValueError("buffer_size is not provided.")
-        check_type(buffer_size, 'buffer_size', int)
-        check_interval_closed(buffer_size, 'buffer_size', [2, INT32_MAX])
+        type_check(buffer_size, (int,), "buffer_size")
 
-        return method(*args, **kwargs)
+        check_value(buffer_size, [2, INT32_MAX], "buffer_size")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -754,23 +526,25 @@ def check_map(method):
     """check the input arguments of map."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [input_columns, _, output_columns, columns_order, num_parallel_workers, python_multiprocessing, cache], _ = \
+            parse_user_args(method, *args, **kwargs)
 
-        nreq_param_list = ['columns_order']
-        nreq_param_int = ['num_parallel_workers']
         nreq_param_columns = ['input_columns', 'output_columns']
-        nreq_param_bool = ['python_multiprocessing']
 
-        check_param_type(nreq_param_list, param_dict, list)
-        check_param_type(nreq_param_int, param_dict, int)
-        check_param_type(nreq_param_bool, param_dict, bool)
-        for param_name in nreq_param_columns:
-            param = param_dict.get(param_name)
+        if columns_order is not None:
+            type_check(columns_order, (list,), "columns_order")
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        type_check(python_multiprocessing, (bool,), "python_multiprocessing")
+        if cache is not None:
+            type_check(cache, (cache_client.DatasetCache,), "cache")
+
+        for param_name, param in zip(nreq_param_columns, [input_columns, output_columns]):
             if param is not None:
                 check_columns(param, param_name)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -779,19 +553,20 @@ def check_filter(method):
     """"check the input arguments of filter."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-        predicate = param_dict.get("predicate")
+    def new_method(self, *args, **kwargs):
+        [predicate, input_columns, num_parallel_workers], _ = parse_user_args(method, *args, **kwargs)
         if not callable(predicate):
             raise TypeError("Predicate should be a python function or a callable python object.")
 
-        nreq_param_int = ['num_parallel_workers']
-        check_param_type(nreq_param_int, param_dict, int)
-        param_name = "input_columns"
-        param = param_dict.get(param_name)
-        if param is not None:
-            check_columns(param, param_name)
-        return method(*args, **kwargs)
+        check_num_parallel_workers(num_parallel_workers)
+
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+
+        if input_columns is not None:
+            check_columns(input_columns, "input_columns")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -800,14 +575,13 @@ def check_repeat(method):
     """check the input arguments of repeat."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
 
-        count = param_dict.get('count')
-        if count is not None:
-            check_count(count)
-
-        return method(*args, **kwargs)
+        type_check(count, (int, type(None)), "repeat")
+        if isinstance(count, int):
+            check_value(count, (-1, INT32_MAX), "count")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -816,15 +590,13 @@ def check_skip(method):
     """check the input arguments of skip."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
 
-        count = param_dict.get('count')
-        check_type(count, 'count', int)
-        if count < 0:
-            raise ValueError("Skip count must be positive integer or 0.")
+        type_check(count, (int,), "count")
+        check_value(count, (-1, INT32_MAX), "count")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -833,13 +605,32 @@ def check_take(method):
     """check the input arguments of take."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
+        type_check(count, (int,), "count")
+        if (count <= 0 and count != -1) or count > INT32_MAX:
+            raise ValueError("count should be either -1 or positive integer.")
 
-        count = param_dict.get('count')
-        check_count(count)
+        return method(self, *args, **kwargs)
 
-        return method(*args, **kwargs)
+    return new_method
+
+
+def check_positive_int32(method):
+    """check whether the input argument is positive and int, only works for functions with one input."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [count], param_dict = parse_user_args(method, *args, **kwargs)
+        para_name = None
+        for key in list(param_dict.keys()):
+            if key not in ['self', 'cls']:
+                para_name = key
+        # Need to get default value of param
+        if count is not None:
+            check_pos_int32(count, para_name)
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -849,13 +640,8 @@ def check_zip(method):
 
     @wraps(method)
     def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
-        check_type(ds, 'datasets', tuple)
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (tuple,), "datasets")
 
         return method(*args, **kwargs)
 
@@ -866,18 +652,11 @@ def check_zip_dataset(method):
     """check the input arguments of zip method in `Dataset`."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
+    def new_method(self, *args, **kwargs):
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (tuple, datasets.Dataset), "datasets")
 
-        if not isinstance(ds, (tuple, datasets.Dataset)):
-            raise TypeError("datasets is not tuple or of type Dataset.")
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -886,18 +665,13 @@ def check_concat(method):
     """check the input arguments of concat method in `Dataset`."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
-
-        if not isinstance(ds, (list, datasets.Dataset)):
-            raise TypeError("datasets is not list or of type Dataset.")
-
-        return method(*args, **kwargs)
+    def new_method(self, *args, **kwargs):
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (list, datasets.Dataset), "datasets")
+        if isinstance(ds, list):
+            dataset_names = ["dataset[{0}]".format(i) for i in range(len(ds)) if isinstance(ds, list)]
+            type_check_list(ds, (datasets.Dataset,), dataset_names)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -906,26 +680,23 @@ def check_rename(method):
     """check the input arguments of rename."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        values, _ = parse_user_args(method, *args, **kwargs)
 
         req_param_columns = ['input_columns', 'output_columns']
-        # check req_param_list; required arguments
-        for param_name in req_param_columns:
-            param = param_dict.get(param_name)
-            if param is None:
-                raise ValueError("{} is not provided.".format(param_name))
+        for param_name, param in zip(req_param_columns, values):
             check_columns(param, param_name)
 
         input_size, output_size = 1, 1
-        if isinstance(param_dict.get(req_param_columns[0]), list):
-            input_size = len(param_dict.get(req_param_columns[0]))
-        if isinstance(param_dict.get(req_param_columns[1]), list):
-            output_size = len(param_dict.get(req_param_columns[1]))
+        input_columns, output_columns = values
+        if isinstance(input_columns, list):
+            input_size = len(input_columns)
+        if isinstance(output_columns, list):
+            output_size = len(output_columns)
         if input_size != output_size:
             raise ValueError("Number of column in input_columns and output_columns is not equal.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -934,75 +705,54 @@ def check_project(method):
     """check the input arguments of project."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check columns; required argument
-        columns = param_dict.get("columns")
-        if columns is None:
-            raise ValueError("columns is not provided.")
+    def new_method(self, *args, **kwargs):
+        [columns], _ = parse_user_args(method, *args, **kwargs)
         check_columns(columns, 'columns')
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_shape(shape, name):
-    if isinstance(shape, list):
-        for element in shape:
-            if not isinstance(element, int):
-                raise TypeError(
-                    "Each element in {0} should be of type int. Got {1}.".format(name, type(element)))
-    else:
-        raise TypeError("Expected int list.")
-
-
 def check_add_column(method):
     """check the input arguments of add_column."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [name, de_type, shape], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(name, (str,), "name")
 
-        # check name; required argument
-        name = param_dict.get("name")
-        if not isinstance(name, str) or not name:
+        if not name:
             raise TypeError("Expected non-empty string.")
 
-        # check type; required argument
-        de_type = param_dict.get("de_type")
         if de_type is not None:
             if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
                 raise TypeError("Unknown column type.")
         else:
             raise TypeError("Expected non-empty string.")
 
-        # check shape
-        shape = param_dict.get("shape")
         if shape is not None:
-            check_shape(shape, "shape")
+            type_check(shape, (list,), "shape")
+            shape_names = ["shape[{0}]".format(i) for i in range(len(shape))]
+            type_check_list(shape, (int,), shape_names)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_cluedataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CLUEDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CLUEDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
-        if not isinstance(dataset_files, (str, list)):
-            raise TypeError("dataset_files should be of type str or a list of strings.")
+        type_check(dataset_files, (str, list), "dataset files")
 
         # check task
         task_param = param_dict.get('task')
@@ -1014,36 +764,29 @@ def check_cluedataset(method):
         if usage_param not in ['train', 'test', 'eval']:
             raise ValueError("usage should be train, test or eval")
 
-        check_param_type(nreq_param_int, param_dict, int)
-
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_textfiledataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(TextFileDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(TextFileDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
-        if not isinstance(dataset_files, (str, list)):
-            raise TypeError("dataset_files should be of type str or a list of strings.")
-
-        check_param_type(nreq_param_int, param_dict, int)
-
+        type_check(dataset_files, (str, list), "dataset files")
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1052,19 +795,16 @@ def check_split(method):
     """check the input arguments of split."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [sizes, randomize], _ = parse_user_args(method, *args, **kwargs)
 
-        nreq_param_list = ['sizes']
-        nreq_param_bool = ['randomize']
-        check_param_type(nreq_param_list, param_dict, list)
-        check_param_type(nreq_param_bool, param_dict, bool)
+        type_check(sizes, (list,), "sizes")
+        type_check(randomize, (bool,), "randomize")
 
         # check sizes: must be list of float or list of int
-        sizes = param_dict.get('sizes')
-
         if not sizes:
             raise ValueError("sizes cannot be empty.")
+
         all_int = all(isinstance(item, int) for item in sizes)
         all_float = all(isinstance(item, float) for item in sizes)
 
@@ -1085,7 +825,7 @@ def check_split(method):
             if not abs(sum(sizes) - 1) < epsilon:
                 raise ValueError("sizes is a list of float, but the percentages do not sum up to 1.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1094,123 +834,85 @@ def check_gnn_graphdata(method):
     """check the input arguments of graphdata."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check dataset_file; required argument
-        dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
-        check_dataset_file(dataset_file)
-
-        nreq_param_int = ['num_parallel_workers']
+    def new_method(self, *args, **kwargs):
+        [dataset_file, num_parallel_workers], _ = parse_user_args(method, *args, **kwargs)
+        check_file(dataset_file)
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        return method(*args, **kwargs)
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_gnn_list_or_ndarray(param, param_name):
-    """Check if the input parameter is list or numpy.ndarray."""
-
-    if isinstance(param, list):
-        for m in param:
-            if not isinstance(m, int):
-                raise TypeError(
-                    "Each member in {0} should be of type int. Got {1}.".format(param_name, type(m)))
-    elif isinstance(param, np.ndarray):
-        if not param.dtype == np.int32:
-            raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
-                param_name, param.dtype))
-    else:
-        raise TypeError("Wrong input type for {0}, should be list or numpy.ndarray, got {1}".format(
-            param_name, type(param)))
-
-
 def check_gnn_get_all_nodes(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_nodes` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_nodes` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check node_type; required argument
-        check_type(param_dict.get("node_type"), 'node_type', int)
+    def new_method(self, *args, **kwargs):
+        [node_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(node_type, (int,), "node_type")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_all_edges(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_edges` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_edges` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [edge_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(edge_type, (int,), "edge_type")
 
-        # check node_type; required argument
-        check_type(param_dict.get("edge_type"), 'edge_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_nodes_from_edges(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_nodes_from_edges` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_nodes_from_edges` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [edge_list], _ = parse_user_args(method, *args, **kwargs)
+        check_gnn_list_or_ndarray(edge_list, "edge_list")
 
-        # check edge_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("edge_list"), 'edge_list')
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_all_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neighbour_type], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
+        type_check(neighbour_type, (int,), "neighbour_type")
 
-        # check neighbor_type; required argument
-        check_type(param_dict.get("neighbor_type"), 'neighbor_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_sampled_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_sampled_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_sampled_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neighbor_nums, neighbor_types], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
 
-        # check neighbor_nums; required argument
-        neighbor_nums = param_dict.get("neighbor_nums")
         check_gnn_list_or_ndarray(neighbor_nums, 'neighbor_nums')
         if not neighbor_nums or len(neighbor_nums) > 6:
             raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
                 'neighbor_nums', len(neighbor_nums)))
 
-        # check neighbor_types; required argument
-        neighbor_types = param_dict.get("neighbor_types")
         check_gnn_list_or_ndarray(neighbor_types, 'neighbor_types')
         if not neighbor_types or len(neighbor_types) > 6:
             raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
@@ -1220,47 +922,41 @@ def check_gnn_get_sampled_neighbors(method):
             raise ValueError(
                 "The number of members of neighbor_nums and neighbor_types is inconsistent")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_neg_sampled_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_neg_sampled_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_neg_sampled_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neg_neighbor_num, neg_neighbor_type], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
+        type_check(neg_neighbor_num, (int,), "neg_neighbor_num")
+        type_check(neg_neighbor_type, (int,), "neg_neighbor_type")
 
-        # check neg_neighbor_num; required argument
-        check_type(param_dict.get("neg_neighbor_num"), 'neg_neighbor_num', int)
-
-        # check neg_neighbor_type; required argument
-        check_type(param_dict.get("neg_neighbor_type"),
-                   'neg_neighbor_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_random_walk(method):
-    """A wrapper that wrap a parameter checker to the GNN `random_walk` function."""
+    """A wrapper that wraps a parameter checker to the GNN `random_walk` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("target_nodes"), 'target_nodes')
+    def new_method(self, *args, **kwargs):
+        [target_nodes, meta_path, step_home_param, step_away_param, default_node], _ = parse_user_args(method, *args,
+                                                                                                       **kwargs)
+        check_gnn_list_or_ndarray(target_nodes, 'target_nodes')
+        check_gnn_list_or_ndarray(meta_path, 'meta_path')
+        type_check(step_home_param, (float,), "step_home_param")
+        type_check(step_away_param, (float,), "step_away_param")
+        type_check(default_node, (int,), "default_node")
 
-        # check meta_path; required argument
-        check_gnn_list_or_ndarray(param_dict.get("meta_path"), 'meta_path')
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1268,8 +964,7 @@ def check_gnn_random_walk(method):
 def check_aligned_list(param, param_name, member_type):
     """Check whether the structure of each member of the list is the same."""
 
-    if not isinstance(param, list):
-        raise TypeError("Parameter {0} is not a list".format(param_name))
+    type_check(param, (list,), "param")
     if not param:
         raise TypeError(
             "Parameter {0} or its members are empty".format(param_name))
@@ -1278,6 +973,7 @@ def check_aligned_list(param, param_name, member_type):
     for member in param:
         if isinstance(member, list):
             check_aligned_list(member, param_name, member_type)
+
             if member_have_list not in (None, True):
                 raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
                     param_name))
@@ -1287,9 +983,7 @@ def check_aligned_list(param, param_name, member_type):
             member_have_list = True
             list_len = len(member)
         else:
-            if not isinstance(member, member_type):
-                raise TypeError("Each member in {0} should be of type int. Got {1}.".format(
-                    param_name, type(member)))
+            type_check(member, (member_type,), param_name)
             if member_have_list not in (None, False):
                 raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
                     param_name))
@@ -1297,53 +991,65 @@ def check_aligned_list(param, param_name, member_type):
 
 
 def check_gnn_get_node_feature(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_node_feature` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_node_feature` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, feature_types], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        node_list = param_dict.get("node_list")
+        type_check(node_list, (list, np.ndarray), "node_list")
         if isinstance(node_list, list):
             check_aligned_list(node_list, 'node_list', int)
         elif isinstance(node_list, np.ndarray):
             if not node_list.dtype == np.int32:
                 raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
                     node_list, node_list.dtype))
-        else:
-            raise TypeError("Wrong input type for {0}, should be list or numpy.ndarray, got {1}".format(
-                'node_list', type(node_list)))
 
-        # check feature_types; required argument
-        check_gnn_list_or_ndarray(param_dict.get(
-            "feature_types"), 'feature_types')
+        check_gnn_list_or_ndarray(feature_types, 'feature_types')
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_gnn_get_edge_feature(method):
+    """A wrapper that wrap a parameter checker to the GNN `get_edge_feature` function."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [edge_list, feature_types], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(edge_list, (list, np.ndarray), "edge_list")
+        if isinstance(edge_list, list):
+            check_aligned_list(edge_list, 'edge_list', int)
+        elif isinstance(edge_list, np.ndarray):
+            if not edge_list.dtype == np.int32:
+                raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
+                    edge_list, edge_list.dtype))
+
+        check_gnn_list_or_ndarray(feature_types, 'feature_types')
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_numpyslicesdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(NumpySlicesDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(NumpySlicesDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check data; required argument
-        data = param_dict.get('data')
-        if not isinstance(data, (list, tuple, dict, np.ndarray)):
-            raise TypeError("Unsupported data type: {}, only support some common python data type, "
-                            "like list, tuple, dict, and numpy array.".format(type(data)))
-        if isinstance(data, tuple) and not isinstance(data[0], (list, np.ndarray)):
-            raise TypeError("Unsupported data type: when input is tuple, only support some common python "
-                            "data type, like tuple of lists and tuple of numpy arrays.")
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
+
+        data = param_dict.get("data")
+        column_names = param_dict.get("column_names")
         if not data:
-            raise ValueError("Input data is empty.")
+            raise ValueError("Argument data cannot be empty")
+        type_check(data, (list, tuple, dict, np.ndarray), "data")
+        if isinstance(data, tuple):
+            type_check(data[0], (list, np.ndarray), "data[0]")
 
         # check column_names
-        column_names = param_dict.get('column_names')
         if column_names is not None:
             check_columns(column_names, "column_names")
 
@@ -1364,6 +1070,6 @@ def check_numpyslicesdataset(method):
                     raise ValueError("Num of input column names is {0}, but required is {1} as data is list."
                                      .format(column_num, 1))
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py
index 8b0d47df25..30fa2b8f42 100644
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@@ -52,8 +52,9 @@ import mindspore._c_dataengine as cde
 
 from .utils import JiebaMode, NormalizeForm, to_str
 from .validators import check_lookup, check_jieba_add_dict, \
-    check_jieba_add_word, check_jieba_init, check_ngram, check_pair_truncate, \
-    check_to_number, check_python_tokenizer
+    check_jieba_add_word, check_jieba_init, check_with_offsets, check_unicode_script_tokenizer,\
+    check_wordpiece_tokenizer, check_regex_tokenizer, check_basic_tokenizer, check_ngram, check_pair_truncate,\
+    check_to_number, check_bert_tokenizer, check_python_tokenizer
 from ..core.datatypes import mstype_to_detype
 
 
@@ -63,17 +64,13 @@ class Lookup(cde.LookupOp):
 
     Args:
         vocab(Vocab): a Vocab object.
-        unknown(int, optional): default id to lookup a word that is out of vocab. If no argument is passed, 1 will be
-            used to be the default id which is the convention for unknown_token <unk>. Otherwise, user is strongly
-            encouraged to pass in the id for <unk> (default=None).
+        unknown_token(str, optional): word to use for lookup if the word being looked up is out of Vocabulary (oov).
+            If unknown_token is oov, runtime error will be thrown (default=None).
     """
 
     @check_lookup
-    def __init__(self, vocab, unknown=None):
-        if unknown is None:
-            super().__init__(vocab)
-        else:
-            super().__init__(vocab, unknown)
+    def __init__(self, vocab, unknown_token=None):
+        super().__init__(vocab, unknown_token)
 
 
 class Ngram(cde.NgramOp):
@@ -98,7 +95,7 @@ class Ngram(cde.NgramOp):
     """
 
     @check_ngram
-    def __init__(self, n, left_pad=None, right_pad=None, separator=None):
+    def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "):
         super().__init__(ngrams=n, l_pad_len=left_pad[1], r_pad_len=right_pad[1], l_pad_token=left_pad[0],
                          r_pad_token=right_pad[0], separator=separator)
 
@@ -125,15 +122,31 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
             - JiebaMode.MP, tokenize with MPSegment algorithm.
             - JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
             - JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=False)
+        >>> data = data.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
     @check_jieba_init
-    def __init__(self, hmm_path, mp_path, mode=JiebaMode.MIX):
+    def __init__(self, hmm_path, mp_path, mode=JiebaMode.MIX, with_offsets=False):
+        if not isinstance(mode, JiebaMode):
+            raise TypeError("Wrong input type for mode, should be JiebaMode.")
+
         self.mode = mode
         self.__check_path__(hmm_path)
         self.__check_path__(mp_path)
+        self.with_offsets = with_offsets
         super().__init__(hmm_path, mp_path,
-                         DE_C_INTER_JIEBA_MODE[mode])
+                         DE_C_INTER_JIEBA_MODE[mode],
+                         self.with_offsets)
 
     @check_jieba_add_word
     def add_word(self, word, freq=None):
@@ -226,8 +239,26 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
 class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
     """
     Tokenize a scalar tensor of UTF-8 string to Unicode characters.
+
+    Args:
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = text.UnicodeCharTokenizer()
+        >>> dataset = dataset.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = text.UnicodeCharTokenizer(True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
+    @check_with_offsets
+    def __init__(self, with_offsets=False):
+        self.with_offsets = with_offsets
+        super().__init__(self.with_offsets)
+
 
 class WordpieceTokenizer(cde.WordpieceTokenizerOp):
     """
@@ -239,22 +270,58 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
         max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split(default=100).
         unknown_token (str, optional): When we can not found the token: if 'unknown_token' is empty string,
             return the token directly, else return 'unknown_token'(default='[UNK]').
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
+        >>>                                       max_bytes_per_token=100, with_offsets=False)
+        >>> dataset = dataset.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
+        >>>                                       max_bytes_per_token=100, with_offsets=True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
-    def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]'):
+    @check_wordpiece_tokenizer
+    def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100,
+                 unknown_token='[UNK]', with_offsets=False):
         self.vocab = vocab
         self.suffix_indicator = suffix_indicator
         self.max_bytes_per_token = max_bytes_per_token
         self.unknown_token = unknown_token
-        super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token)
+        self.with_offsets = with_offsets
+        super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token,
+                         self.unknown_token, self.with_offsets)
 
 
 if platform.system().lower() != 'windows':
     class WhitespaceTokenizer(cde.WhitespaceTokenizerOp):
         """
         Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces(such as: ' ', '\\\\t', '\\\\r', '\\\\n').
+
+        Args:
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.WhitespaceTokenizer()
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.WhitespaceTokenizer(True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
+        @check_with_offsets
+        def __init__(self, with_offsets=False):
+            self.with_offsets = with_offsets
+            super().__init__(self.with_offsets)
+
 
     class UnicodeScriptTokenizer(cde.UnicodeScriptTokenizerOp):
         """
@@ -262,11 +329,25 @@ if platform.system().lower() != 'windows':
 
         Args:
             keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, keep_whitespace=False):
+        @check_unicode_script_tokenizer
+        def __init__(self, keep_whitespace=False, with_offsets=False):
             self.keep_whitespace = keep_whitespace
-            super().__init__(self.keep_whitespace)
+            self.with_offsets = with_offsets
+            super().__init__(self.keep_whitespace, self.with_offsets)
 
 
     class CaseFold(cde.CaseFoldOp):
@@ -302,6 +383,9 @@ if platform.system().lower() != 'windows':
         """
 
         def __init__(self, normalize_form=NormalizeForm.NFKC):
+            if not isinstance(normalize_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form]
             super().__init__(self.normalize_form)
 
@@ -338,12 +422,26 @@ if platform.system().lower() != 'windows':
             keep_delim_pattern(str, optional): The string matched by 'delim_pattern' can be kept as a token
                 if it can be matched by 'keep_delim_pattern'. And the default value is empty str(''),
                 in this situation, delimiters will not kept as a output token(default='').
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, delim_pattern, keep_delim_pattern=''):
+        @check_regex_tokenizer
+        def __init__(self, delim_pattern, keep_delim_pattern='', with_offsets=False):
             self.delim_pattern = delim_pattern
             self.keep_delim_pattern = keep_delim_pattern
-            super().__init__(self.delim_pattern, self.keep_delim_pattern)
+            self.with_offsets = with_offsets
+            super().__init__(self.delim_pattern, self.keep_delim_pattern, self.with_offsets)
 
 
     class BasicTokenizer(cde.BasicTokenizerOp):
@@ -359,16 +457,41 @@ if platform.system().lower() != 'windows':
                 only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
             preserve_unused_token(bool, optional): If True, do not split special tokens like
                 '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
+            >>>                                   keep_whitespace=False,
+            >>>                                   normalization_form=NormalizeForm.NONE,
+            >>>                                   preserve_unused_token=True,
+            >>>                                   with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
+            >>>                                   keep_whitespace=False,
+            >>>                                   normalization_form=NormalizeForm.NONE,
+            >>>                                   preserve_unused_token=True,
+            >>>                                   with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, lower_case=False, keep_whitespace=False,
-                     normalization_form=NormalizeForm.NONE, preserve_unused_token=True):
+        @check_basic_tokenizer
+        def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
+                     preserve_unused_token=True, with_offsets=False):
+            if not isinstance(normalization_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.lower_case = lower_case
             self.keep_whitespace = keep_whitespace
             self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form]
             self.preserve_unused_token = preserve_unused_token
-            super().__init__(self.lower_case, self.keep_whitespace,
-                             self.normalization_form, self.preserve_unused_token)
+            self.with_offsets = with_offsets
+            super().__init__(self.lower_case, self.keep_whitespace, self.normalization_form,
+                             self.preserve_unused_token, self.with_offsets)
 
 
     class BertTokenizer(cde.BertTokenizerOp):
@@ -389,11 +512,33 @@ if platform.system().lower() != 'windows':
                 only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
             preserve_unused_token(bool, optional): If True, do not split special tokens like
                 '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
+            >>>                                  unknown_token=100, lower_case=False, keep_whitespace=False,
+            >>>                                  normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
+            >>>                                  with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
+            >>>                                  unknown_token=100, lower_case=False, keep_whitespace=False,
+            >>>                                  normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
+            >>>                                  with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100,
-                     unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
-                     normalization_form=NormalizeForm.NONE, preserve_unused_token=True):
+        @check_bert_tokenizer
+        def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]',
+                     lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
+                     preserve_unused_token=True, with_offsets=False):
+            if not isinstance(normalization_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.vocab = vocab
             self.suffix_indicator = suffix_indicator
             self.max_bytes_per_token = max_bytes_per_token
@@ -402,8 +547,10 @@ if platform.system().lower() != 'windows':
             self.keep_whitespace = keep_whitespace
             self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form]
             self.preserve_unused_token = preserve_unused_token
+            self.with_offsets = with_offsets
             super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token,
-                             self.lower_case, self.keep_whitespace, self.normalization_form, self.preserve_unused_token)
+                             self.lower_case, self.keep_whitespace, self.normalization_form,
+                             self.preserve_unused_token, self.with_offsets)
 
 
 class TruncateSequencePair(cde.TruncateSequencePairOp):
diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py
index 7347a4b854..ef1d0e6fc5 100644
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -28,6 +28,7 @@ __all__ = [
     "Vocab", "to_str", "to_bytes"
 ]
 
+
 class Vocab(cde.Vocab):
     """
     Vocab object that is used to lookup a word.
@@ -38,7 +39,7 @@ class Vocab(cde.Vocab):
     @classmethod
     @check_from_dataset
     def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None,
-                     special_first=None):
+                     special_first=True):
         """
         Build a vocab from a dataset.
 
@@ -62,13 +63,21 @@ class Vocab(cde.Vocab):
             special_tokens(list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab. If special_tokens
-                is specified and special_first is set to None, special_tokens will be prepended (default=None).
+                is specified and special_first is set to True, special_tokens will be prepended (default=True).
 
         Returns:
             Vocab, Vocab object built from dataset.
         """
 
         vocab = Vocab()
+        if columns is None:
+            columns = []
+        if not isinstance(columns, list):
+            columns = [columns]
+        if freq_range is None:
+            freq_range = (None, None)
+        if special_tokens is None:
+            special_tokens = []
         root = copy.deepcopy(dataset).build_vocab(vocab, columns, freq_range, top_k, special_tokens, special_first)
         for d in root.create_dict_iterator():
             if d is not None:
@@ -77,7 +86,7 @@ class Vocab(cde.Vocab):
 
     @classmethod
     @check_from_list
-    def from_list(cls, word_list, special_tokens=None, special_first=None):
+    def from_list(cls, word_list, special_tokens=None, special_first=True):
         """
         Build a vocab object from a list of word.
 
@@ -86,29 +95,33 @@ class Vocab(cde.Vocab):
             special_tokens(list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
-                is specified and special_first is set to None, special_tokens will be prepended (default=None).
+                is specified and special_first is set to True, special_tokens will be prepended (default=True).
         """
-
+        if special_tokens is None:
+            special_tokens = []
         return super().from_list(word_list, special_tokens, special_first)
 
     @classmethod
     @check_from_file
-    def from_file(cls, file_path, delimiter=None, vocab_size=None, special_tokens=None, special_first=None):
+    def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
         """
         Build a vocab object from a list of word.
 
         Args:
             file_path (str): path to the file which contains the vocab list.
             delimiter (str, optional): a delimiter to break up each line in file, the first element is taken to be
-                the word (default=None).
+                the word (default="").
             vocab_size (int, optional): number of words to read from file_path (default=None, all words are taken).
             special_tokens (list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first (bool, optional): whether special_tokens will be prepended/appended to vocab,
-                If special_tokens is specified and special_first is set to None,
-                special_tokens will be prepended (default=None).
+                If special_tokens is specified and special_first is set to True,
+                special_tokens will be prepended (default=True).
         """
-
+        if vocab_size is None:
+            vocab_size = -1
+        if special_tokens is None:
+            special_tokens = []
         return super().from_file(file_path, delimiter, vocab_size, special_tokens, special_first)
 
     @classmethod
diff --git a/mindspore/dataset/text/validators.py b/mindspore/dataset/text/validators.py
index afab8665cd..b0327f5609 100644
--- a/mindspore/dataset/text/validators.py
+++ b/mindspore/dataset/text/validators.py
@@ -17,23 +17,22 @@ validators for text ops
 """
 
 from functools import wraps
-
-import mindspore._c_dataengine as cde
 import mindspore.common.dtype as mstype
 
+import mindspore._c_dataengine as cde
 from mindspore._c_expression import typing
-from ..transforms.validators import check_uint32, check_pos_int64
+
+from ..core.validator_helpers import parse_user_args, type_check, type_check_list, check_uint32, \
+    INT32_MAX, check_value, check_positive
 
 
 def check_unique_list_of_words(words, arg_name):
     """Check that words is a list and each element is a str without any duplication"""
 
-    if not isinstance(words, list):
-        raise ValueError(arg_name + " needs to be a list of words of type string.")
+    type_check(words, (list,), arg_name)
     words_set = set()
     for word in words:
-        if not isinstance(word, str):
-            raise ValueError("each word in " + arg_name + " needs to be type str.")
+        type_check(word, (str,), arg_name)
         if word in words_set:
             raise ValueError(arg_name + " contains duplicate word: " + word + ".")
         words_set.add(word)
@@ -41,161 +40,100 @@ def check_unique_list_of_words(words, arg_name):
 
 
 def check_lookup(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        vocab, unknown = (list(args) + 2 * [None])[:2]
-        if "vocab" in kwargs:
-            vocab = kwargs.get("vocab")
-        if "unknown" in kwargs:
-            unknown = kwargs.get("unknown")
-        if unknown is not None:
-            if not (isinstance(unknown, int) and unknown >= 0):
-                raise ValueError("unknown needs to be a non-negative integer.")
+        [vocab, unknown_token], _ = parse_user_args(method, *args, **kwargs)
 
-        if not isinstance(vocab, cde.Vocab):
-            raise ValueError("vocab is not an instance of cde.Vocab.")
+        if unknown_token is not None:
+            type_check(unknown_token, (str,), "unknown_token")
 
-        kwargs["vocab"] = vocab
-        kwargs["unknown"] = unknown
-        return method(self, **kwargs)
+        type_check(vocab, (cde.Vocab,), "vocab is not an instance of cde.Vocab.")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_file(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        file_path, delimiter, vocab_size, special_tokens, special_first = (list(args) + 5 * [None])[:5]
-        if "file_path" in kwargs:
-            file_path = kwargs.get("file_path")
-        if "delimiter" in kwargs:
-            delimiter = kwargs.get("delimiter")
-        if "vocab_size" in kwargs:
-            vocab_size = kwargs.get("vocab_size")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
-
-        if not isinstance(file_path, str):
-            raise ValueError("file_path needs to be str.")
-
-        if delimiter is not None:
-            if not isinstance(delimiter, str):
-                raise ValueError("delimiter needs to be str.")
-        else:
-            delimiter = ""
+        [file_path, delimiter, vocab_size, special_tokens, special_first], _ = parse_user_args(method, *args,
+                                                                                               **kwargs)
+        if special_tokens is not None:
+            check_unique_list_of_words(special_tokens, "special_tokens")
+        type_check_list([file_path, delimiter], (str,), ["file_path", "delimiter"])
         if vocab_size is not None:
-            if not (isinstance(vocab_size, int) and vocab_size > 0):
-                raise ValueError("vocab size needs to be a positive integer.")
-        else:
-            vocab_size = -1
-
-        if special_first is None:
-            special_first = True
-
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value")
-
-        if special_tokens is None:
-            special_tokens = []
+            check_value(vocab_size, (-1, INT32_MAX), "vocab_size")
+        type_check(special_first, (bool,), special_first)
 
-        check_unique_list_of_words(special_tokens, "special_tokens")
-
-        kwargs["file_path"] = file_path
-        kwargs["delimiter"] = delimiter
-        kwargs["vocab_size"] = vocab_size
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_list(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word_list, special_tokens, special_first = (list(args) + 3 * [None])[:3]
-        if "word_list" in kwargs:
-            word_list = kwargs.get("word_list")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
-        if special_tokens is None:
-            special_tokens = []
-        word_set = check_unique_list_of_words(word_list, "word_list")
-        token_set = check_unique_list_of_words(special_tokens, "special_tokens")
+        [word_list, special_tokens, special_first], _ = parse_user_args(method, *args, **kwargs)
 
-        intersect = word_set.intersection(token_set)
+        word_set = check_unique_list_of_words(word_list, "word_list")
+        if special_tokens is not None:
+            token_set = check_unique_list_of_words(special_tokens, "special_tokens")
 
-        if intersect != set():
-            raise ValueError("special_tokens and word_list contain duplicate word :" + str(intersect) + ".")
+            intersect = word_set.intersection(token_set)
 
-        if special_first is None:
-            special_first = True
+            if intersect != set():
+                raise ValueError("special_tokens and word_list contain duplicate word :" + str(intersect) + ".")
 
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value.")
+        type_check(special_first, (bool,), "special_first")
 
-        kwargs["word_list"] = word_list
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_dict(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word_dict, = (list(args) + [None])[:1]
-        if "word_dict" in kwargs:
-            word_dict = kwargs.get("word_dict")
-        if not isinstance(word_dict, dict):
-            raise ValueError("word_dict needs to be a list of word,id pairs.")
+        [word_dict], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(word_dict, (dict,), "word_dict")
+
         for word, word_id in word_dict.items():
-            if not isinstance(word, str):
-                raise ValueError("Each word in word_dict needs to be type string.")
-            if not (isinstance(word_id, int) and word_id >= 0):
-                raise ValueError("Each word id needs to be positive integer.")
-        kwargs["word_dict"] = word_dict
-        return method(self, **kwargs)
+            type_check(word, (str,), "word")
+            type_check(word_id, (int,), "word_id")
+            check_value(word_id, (0, INT32_MAX), "word_id")
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_jieba_init(method):
-    """Wrapper method to check the parameters of jieba add word."""
+    """Wrapper method to check the parameters of jieba init."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        hmm_path, mp_path, model = (list(args) + 3 * [None])[:3]
+        [hmm_path, mp_path, _, with_offsets], _ = parse_user_args(method, *args, **kwargs)
 
-        if "hmm_path" in kwargs:
-            hmm_path = kwargs.get("hmm_path")
-        if "mp_path" in kwargs:
-            mp_path = kwargs.get("mp_path")
         if hmm_path is None:
-            raise ValueError(
-                "The dict of HMMSegment in cppjieba is not provided.")
-        kwargs["hmm_path"] = hmm_path
+            raise ValueError("The dict of HMMSegment in cppjieba is not provided.")
+        if not isinstance(hmm_path, str):
+            raise TypeError("Wrong input type for hmm_path, should be string.")
         if mp_path is None:
-            raise ValueError(
-                "The dict of MPSegment in cppjieba is not provided.")
-        kwargs["mp_path"] = mp_path
-        if model is not None:
-            kwargs["model"] = model
-        return method(self, **kwargs)
+            raise ValueError("The dict of MPSegment in cppjieba is not provided.")
+        if not isinstance(mp_path, str):
+            raise TypeError("Wrong input type for mp_path, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -205,19 +143,12 @@ def check_jieba_add_word(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word, freq = (list(args) + 2 * [None])[:2]
-
-        if "word" in kwargs:
-            word = kwargs.get("word")
-        if "freq" in kwargs:
-            freq = kwargs.get("freq")
+        [word, freq], _ = parse_user_args(method, *args, **kwargs)
         if word is None:
             raise ValueError("word is not provided.")
-        kwargs["word"] = word
         if freq is not None:
             check_uint32(freq)
-            kwargs["freq"] = freq
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -227,104 +158,183 @@ def check_jieba_add_dict(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        user_dict = (list(args) + [None])[0]
-        if "user_dict" in kwargs:
-            user_dict = kwargs.get("user_dict")
-        if user_dict is None:
-            raise ValueError("user_dict is not provided.")
-        kwargs["user_dict"] = user_dict
-        return method(self, **kwargs)
+        parse_user_args(method, *args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_from_dataset(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+def check_with_offsets(method):
+    """Wrapper method to check if with_offsets is the only one parameter."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
+        [with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
-        dataset, columns, freq_range, top_k, special_tokens, special_first = (list(args) + 6 * [None])[:6]
-        if "dataset" in kwargs:
-            dataset = kwargs.get("dataset")
-        if "columns" in kwargs:
-            columns = kwargs.get("columns")
-        if "freq_range" in kwargs:
-            freq_range = kwargs.get("freq_range")
-        if "top_k" in kwargs:
-            top_k = kwargs.get("top_k")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
+    return new_method
 
-        if columns is None:
-            columns = []
 
-        if not isinstance(columns, list):
-            columns = [columns]
+def check_unicode_script_tokenizer(method):
+    """Wrapper method to check the parameter of UnicodeScriptTokenizer."""
 
-        for column in columns:
-            if not isinstance(column, str):
-                raise ValueError("columns need to be a list of strings.")
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [keep_whitespace, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
-        if freq_range is None:
-            freq_range = (None, None)
+    return new_method
 
-        if not isinstance(freq_range, tuple) or len(freq_range) != 2:
-            raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
 
-        for num in freq_range:
-            if num is not None and (not isinstance(num, int)):
-                raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
+def check_wordpiece_tokenizer(method):
+    """Wrapper method to check the parameter of WordpieceTokenizer."""
 
-        if isinstance(freq_range[0], int) and isinstance(freq_range[1], int):
-            if freq_range[0] > freq_range[1] or freq_range[0] < 0:
-                raise ValueError("frequency range [a,b] should be 0 <= a <= b (a,b are inclusive).")
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [vocab, suffix_indicator, max_bytes_per_token, unknown_token, with_offsets], _ = \
+            parse_user_args(method, *args, **kwargs)
+        if vocab is None:
+            raise ValueError("vocab is not provided.")
+        if not isinstance(vocab, cde.Vocab):
+            raise TypeError("Wrong input type for vocab, should be Vocab object.")
+        if not isinstance(suffix_indicator, str):
+            raise TypeError("Wrong input type for suffix_indicator, should be string.")
+        if not isinstance(unknown_token, str):
+            raise TypeError("Wrong input type for unknown_token, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        check_uint32(max_bytes_per_token)
+        return method(self, *args, **kwargs)
 
-        if top_k is not None and (not isinstance(top_k, int)):
-            raise ValueError("top_k needs to be a positive integer.")
+    return new_method
 
-        if isinstance(top_k, int) and top_k <= 0:
-            raise ValueError("top_k needs to be a positive integer.")
 
-        if special_first is None:
-            special_first = True
+def check_regex_tokenizer(method):
+    """Wrapper method to check the parameter of RegexTokenizer."""
 
-        if special_tokens is None:
-            special_tokens = []
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [delim_pattern, keep_delim_pattern, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if delim_pattern is None:
+            raise ValueError("delim_pattern is not provided.")
+        if not isinstance(delim_pattern, str):
+            raise TypeError("Wrong input type for delim_pattern, should be string.")
+        if not isinstance(keep_delim_pattern, str):
+            raise TypeError("Wrong input type for keep_delim_pattern, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value.")
+    return new_method
 
-        check_unique_list_of_words(special_tokens, "special_tokens")
 
-        kwargs["dataset"] = dataset
-        kwargs["columns"] = columns
-        kwargs["freq_range"] = freq_range
-        kwargs["top_k"] = top_k
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
+def check_basic_tokenizer(method):
+    """Wrapper method to check the parameter of RegexTokenizer."""
 
-        return method(self, **kwargs)
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [lower_case, keep_whitespace, _, preserve_unused, with_offsets], _ = \
+            parse_user_args(method, *args, **kwargs)
+        if not isinstance(lower_case, bool):
+            raise TypeError("Wrong input type for lower_case, should be boolean.")
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(preserve_unused, bool):
+            raise TypeError("Wrong input type for preserve_unused_token, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_bert_tokenizer(method):
+    """Wrapper method to check the parameter of BertTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [vocab, suffix_indicator, max_bytes_per_token, unknown_token, lower_case, keep_whitespace, _,
+         preserve_unused_token, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if vocab is None:
+            raise ValueError("vacab is not provided.")
+        if not isinstance(vocab, cde.Vocab):
+            raise TypeError("Wrong input type for vocab, should be Vocab object.")
+        if not isinstance(suffix_indicator, str):
+            raise TypeError("Wrong input type for suffix_indicator, should be string.")
+        if not isinstance(max_bytes_per_token, int):
+            raise TypeError("Wrong input type for max_bytes_per_token, should be int.")
+        check_uint32(max_bytes_per_token)
+
+        if not isinstance(unknown_token, str):
+            raise TypeError("Wrong input type for unknown_token, should be string.")
+        if not isinstance(lower_case, bool):
+            raise TypeError("Wrong input type for lower_case, should be boolean.")
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(preserve_unused_token, bool):
+            raise TypeError("Wrong input type for preserve_unused_token, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_from_dataset(method):
+    """A wrapper that wraps a parameter checker to the original function."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+
+        [_, columns, freq_range, top_k, special_tokens, special_first], _ = parse_user_args(method, *args,
+                                                                                            **kwargs)
+        if columns is not None:
+            if not isinstance(columns, list):
+                columns = [columns]
+                col_names = ["col_{0}".format(i) for i in range(len(columns))]
+                type_check_list(columns, (str,), col_names)
+
+        if freq_range is not None:
+            type_check(freq_range, (tuple,), "freq_range")
+
+            if len(freq_range) != 2:
+                raise ValueError("freq_range needs to be a tuple of 2 integers or an int and a None.")
+
+            for num in freq_range:
+                if num is not None and (not isinstance(num, int)):
+                    raise ValueError(
+                        "freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
+
+            if isinstance(freq_range[0], int) and isinstance(freq_range[1], int):
+                if freq_range[0] > freq_range[1] or freq_range[0] < 0:
+                    raise ValueError("frequency range [a,b] should be 0 <= a <= b (a,b are inclusive).")
+
+        type_check(top_k, (int, type(None)), "top_k")
+
+        if isinstance(top_k, int):
+            check_positive(top_k, "top_k")
+        type_check(special_first, (bool,), "special_first")
+
+        if special_tokens is not None:
+            check_unique_list_of_words(special_tokens, "special_tokens")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_ngram(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        n, left_pad, right_pad, separator = (list(args) + 4 * [None])[:4]
-        if "n" in kwargs:
-            n = kwargs.get("n")
-        if "left_pad" in kwargs:
-            left_pad = kwargs.get("left_pad")
-        if "right_pad" in kwargs:
-            right_pad = kwargs.get("right_pad")
-        if "separator" in kwargs:
-            separator = kwargs.get("separator")
+        [n, left_pad, right_pad, separator], _ = parse_user_args(method, *args, **kwargs)
 
         if isinstance(n, int):
             n = [n]
@@ -332,15 +342,9 @@ def check_ngram(method):
         if not (isinstance(n, list) and n != []):
             raise ValueError("n needs to be a non-empty list of positive integers.")
 
-        for gram in n:
-            if not (isinstance(gram, int) and gram > 0):
-                raise ValueError("n in ngram needs to be a positive number.")
-
-        if left_pad is None:
-            left_pad = ("", 0)
-
-        if right_pad is None:
-            right_pad = ("", 0)
+        for i, gram in enumerate(n):
+            type_check(gram, (int,), "gram[{0}]".format(i))
+            check_positive(gram, "gram_{}".format(i))
 
         if not (isinstance(left_pad, tuple) and len(left_pad) == 2 and isinstance(left_pad[0], str) and isinstance(
                 left_pad[1], int)):
@@ -353,11 +357,7 @@ def check_ngram(method):
         if not (left_pad[1] >= 0 and right_pad[1] >= 0):
             raise ValueError("padding width need to be positive numbers.")
 
-        if separator is None:
-            separator = " "
-
-        if not isinstance(separator, str):
-            raise ValueError("separator needs to be a string.")
+        type_check(separator, (str,), "separator")
 
         kwargs["n"] = n
         kwargs["left_pad"] = left_pad
@@ -374,16 +374,8 @@ def check_pair_truncate(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        max_length = (list(args) + [None])[0]
-        if "max_length" in kwargs:
-            max_length = kwargs.get("max_length")
-        if max_length is None:
-            raise ValueError("max_length is not provided.")
-
-        check_pos_int64(max_length)
-        kwargs["max_length"] = max_length
-
-        return method(self, **kwargs)
+        parse_user_args(method, *args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -393,22 +385,13 @@ def check_to_number(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        data_type = (list(args) + [None])[0]
-        if "data_type" in kwargs:
-            data_type = kwargs.get("data_type")
-
-        if data_type is None:
-            raise ValueError("data_type is a mandatory parameter but was not provided.")
-
-        if not isinstance(data_type, typing.Type):
-            raise TypeError("data_type is not a MindSpore data type.")
+        [data_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(data_type, (typing.Type,), "data_type")
 
         if data_type not in mstype.number_type:
             raise TypeError("data_type is not numeric data type.")
 
-        kwargs["data_type"] = data_type
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -418,18 +401,11 @@ def check_python_tokenizer(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        tokenizer = (list(args) + [None])[0]
-        if "tokenizer" in kwargs:
-            tokenizer = kwargs.get("tokenizer")
-
-        if tokenizer is None:
-            raise ValueError("tokenizer is a mandatory parameter.")
+        [tokenizer], _ = parse_user_args(method, *args, **kwargs)
 
         if not callable(tokenizer):
             raise TypeError("tokenizer is not a callable python function")
 
-        kwargs["tokenizer"] = tokenizer
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py
index 48e986202c..62496822e5 100644
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -197,7 +197,7 @@ class PadEnd(cde.PadEndOp):
 
 class Concatenate(cde.ConcatenateOp):
     """
-    Tensor operation to prepend and append to a tensor.
+    Tensor operation that concatenates all columns into a single tensor.
 
     Args:
         axis (int, optional): axis to concatenate the tensors along (Default=0).
diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py
index 6b5760e0c5..9fe0fa5f10 100644
--- a/mindspore/dataset/transforms/validators.py
+++ b/mindspore/dataset/transforms/validators.py
@@ -18,6 +18,7 @@ from functools import wraps
 import numpy as np
 
 from mindspore._c_expression import typing
+from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive
 
 # POS_INT_MIN is used to limit values from starting from 0
 POS_INT_MIN = 1
@@ -37,106 +38,33 @@ DOUBLE_MAX_INTEGER = 9007199254740992
 DOUBLE_MIN_INTEGER = -9007199254740992
 
 
-def check_type(value, valid_type):
-    if not isinstance(value, valid_type):
-        raise ValueError("Wrong input type")
-
-
-def check_value(value, valid_range):
-    if value < valid_range[0] or value > valid_range[1]:
-        raise ValueError("Input is not within the required range")
-
-
-def check_range(values, valid_range):
-    if not valid_range[0] <= values[0] <= values[1] <= valid_range[1]:
-        raise ValueError("Input range is not valid")
-
-
-def check_positive(value):
-    if value <= 0:
-        raise ValueError("Input must greater than 0")
-
-
-def check_positive_float(value, valid_max=None):
-    if value <= 0 or not isinstance(value, float) or (valid_max is not None and value > valid_max):
-        raise ValueError("Input need to be a valid positive float.")
-
-
-def check_bool(value):
-    if not isinstance(value, bool):
-        raise ValueError("Value needs to be a boolean.")
-
-
-def check_2tuple(value):
-    if not (isinstance(value, tuple) and len(value) == 2):
-        raise ValueError("Value needs to be a 2-tuple.")
-
-
-def check_list(value):
-    if not isinstance(value, list):
-        raise ValueError("The input needs to be a list.")
-
-
-def check_uint8(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT8_MIN, UINT8_MAX])
-
-
-def check_uint32(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT32_MIN, UINT32_MAX])
-
-
-def check_pos_int32(value):
-    """Checks for int values starting from 1"""
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [POS_INT_MIN, INT32_MAX])
-
-
-def check_uint64(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT64_MIN, UINT64_MAX])
-
-
-def check_pos_int64(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT64_MIN, INT64_MAX])
-
+def check_fill_value(method):
+    """Wrapper method to check the parameters of fill_value."""
 
-def check_pos_float32(value):
-    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER])
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [fill_value], _ = parse_user_args(method, *args, **kwargs)
+        type_check(fill_value, (str, float, bool, int, bytes), "fill_value")
 
+        return method(self, *args, **kwargs)
 
-def check_pos_float64(value):
-    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER])
+    return new_method
 
 
 def check_one_hot_op(method):
-    """Wrapper method to check the parameters of one hot op."""
+    """Wrapper method to check the parameters of one_hot_op."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        num_classes, smoothing_rate = args
-        if "num_classes" in kwargs:
-            num_classes = kwargs.get("num_classes")
-        if "smoothing_rate" in kwargs:
-            smoothing_rate = kwargs.get("smoothing_rate")
-
-        if num_classes is None:
-            raise ValueError("num_classes")
-        check_pos_int32(num_classes)
-        kwargs["num_classes"] = num_classes
+        [num_classes, smoothing_rate], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(num_classes, (int,), "num_classes")
+        check_positive(num_classes)
+
         if smoothing_rate is not None:
-            check_value(smoothing_rate, [0., 1.])
-            kwargs["smoothing_rate"] = smoothing_rate
+            check_value(smoothing_rate, [0., 1.], "smoothing_rate")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -146,35 +74,12 @@ def check_num_classes(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        num_classes = (list(args) + [None])[0]
-        if "num_classes" in kwargs:
-            num_classes = kwargs.get("num_classes")
-        if num_classes is None:
-            raise ValueError("num_classes is not provided.")
-
-        check_pos_int32(num_classes)
-        kwargs["num_classes"] = num_classes
-
-        return method(self, **kwargs)
-
-    return new_method
-
+        [num_classes], _ = parse_user_args(method, *args, **kwargs)
 
-def check_fill_value(method):
-    """Wrapper method to check the parameters of fill value."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        fill_value = (list(args) + [None])[0]
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if fill_value is None:
-            raise ValueError("fill_value is not provided.")
-        if not isinstance(fill_value, (str, float, bool, int, bytes)):
-            raise TypeError("fill_value must be either a primitive python str, float, bool, bytes or int")
-        kwargs["fill_value"] = fill_value
+        type_check(num_classes, (int,), "num_classes")
+        check_positive(num_classes)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -184,17 +89,11 @@ def check_de_type(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        data_type = (list(args) + [None])[0]
-        if "data_type" in kwargs:
-            data_type = kwargs.get("data_type")
+        [data_type], _ = parse_user_args(method, *args, **kwargs)
 
-        if data_type is None:
-            raise ValueError("data_type is not provided.")
-        if not isinstance(data_type, typing.Type):
-            raise TypeError("data_type is not a MindSpore data type.")
-        kwargs["data_type"] = data_type
+        type_check(data_type, (typing.Type,), "data_type")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -204,13 +103,11 @@ def check_slice_op(method):
 
     @wraps(method)
     def new_method(self, *args):
-        for i, arg in enumerate(args):
-            if arg is not None and arg is not Ellipsis and not isinstance(arg, (int, slice, list)):
-                raise TypeError("Indexing of dim " + str(i) + "is not of valid type")
+        for _, arg in enumerate(args):
+            type_check(arg, (int, slice, list, type(None), type(Ellipsis)), "arg")
             if isinstance(arg, list):
                 for a in arg:
-                    if not isinstance(a, int):
-                        raise TypeError("Index " + a + " is not an int")
+                    type_check(a, (int,), "a")
         return method(self, *args)
 
     return new_method
@@ -221,36 +118,14 @@ def check_mask_op(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        operator, constant, dtype = (list(args) + 3 * [None])[:3]
-        if "operator" in kwargs:
-            operator = kwargs.get("operator")
-        if "constant" in kwargs:
-            constant = kwargs.get("constant")
-        if "dtype" in kwargs:
-            dtype = kwargs.get("dtype")
-
-        if operator is None:
-            raise ValueError("operator is not provided.")
-
-        if constant is None:
-            raise ValueError("constant is not provided.")
+        [operator, constant, dtype], _ = parse_user_args(method, *args, **kwargs)
 
         from .c_transforms import Relational
-        if not isinstance(operator, Relational):
-            raise TypeError("operator is not a Relational operator enum.")
+        type_check(operator, (Relational,), "operator")
+        type_check(constant, (str, float, bool, int, bytes), "constant")
+        type_check(dtype, (typing.Type,), "dtype")
 
-        if not isinstance(constant, (str, float, bool, int, bytes)):
-            raise TypeError("constant must be either a primitive python str, float, bool, bytes or int")
-
-        if dtype is not None:
-            if not isinstance(dtype, typing.Type):
-                raise TypeError("dtype is not a MindSpore data type.")
-            kwargs["dtype"] = dtype
-
-        kwargs["operator"] = operator
-        kwargs["constant"] = constant
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -260,22 +135,12 @@ def check_pad_end(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        pad_shape, pad_value = (list(args) + 2 * [None])[:2]
-        if "pad_shape" in kwargs:
-            pad_shape = kwargs.get("pad_shape")
-        if "pad_value" in kwargs:
-            pad_value = kwargs.get("pad_value")
 
-        if pad_shape is None:
-            raise ValueError("pad_shape is not provided.")
+        [pad_shape, pad_value], _ = parse_user_args(method, *args, **kwargs)
 
         if pad_value is not None:
-            if not isinstance(pad_value, (str, float, bool, int, bytes)):
-                raise TypeError("pad_value must be either a primitive python str, float, bool, int or bytes")
-            kwargs["pad_value"] = pad_value
-
-        if not isinstance(pad_shape, list):
-            raise TypeError("pad_shape must be a list")
+            type_check(pad_value, (str, float, bool, int, bytes), "pad_value")
+        type_check(pad_shape, (list,), "pad_end")
 
         for dim in pad_shape:
             if dim is not None:
@@ -284,9 +149,7 @@ def check_pad_end(method):
                 else:
                     raise TypeError("a value in the list is not an integer.")
 
-        kwargs["pad_shape"] = pad_shape
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -296,31 +159,24 @@ def check_concat_type(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        axis, prepend, append = (list(args) + 3 * [None])[:3]
-        if "prepend" in kwargs:
-            prepend = kwargs.get("prepend")
-        if "append" in kwargs:
-            append = kwargs.get("append")
-        if "axis" in kwargs:
-            axis = kwargs.get("axis")
+
+        [axis, prepend, append], _ = parse_user_args(method, *args, **kwargs)
 
         if axis is not None:
-            if not isinstance(axis, int):
-                raise TypeError("axis type is not valid, must be an integer.")
+            type_check(axis, (int,), "axis")
             if axis not in (0, -1):
                 raise ValueError("only 1D concatenation supported.")
-            kwargs["axis"] = axis
 
         if prepend is not None:
-            if not isinstance(prepend, (type(None), np.ndarray)):
-                raise ValueError("prepend type is not valid, must be None for no prepend tensor or a numpy array.")
-            kwargs["prepend"] = prepend
+            type_check(prepend, (np.ndarray,), "prepend")
+            if len(prepend.shape) != 1:
+                raise ValueError("can only prepend 1D arrays.")
 
         if append is not None:
-            if not isinstance(append, (type(None), np.ndarray)):
-                raise ValueError("append type is not valid, must be None for no append tensor or a numpy array.")
-            kwargs["append"] = append
+            type_check(append, (np.ndarray,), "append")
+            if len(append.shape) != 1:
+                raise ValueError("can only append 1D arrays.")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py
index 43ac037541..8e3b7c7214 100644
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -40,12 +40,14 @@ Examples:
         >>> dataset = dataset.map(input_columns="image", operations=transforms_list)
         >>> dataset = dataset.map(input_columns="label", operations=onehot_op)
 """
+import numbers
 import mindspore._c_dataengine as cde
 
 from .utils import Inter, Border
 from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \
-    check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, \
-    check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp
+    check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, check_range, \
+    check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp, \
+    FLOAT_MAX_INTEGER
 
 DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
@@ -57,6 +59,18 @@ DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
                     Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
 
 
+def parse_padding(padding):
+    if isinstance(padding, numbers.Number):
+        padding = [padding] * 4
+    if len(padding) == 2:
+        left = right = padding[0]
+        top = bottom = padding[1]
+        padding = (left, top, right, bottom,)
+    if isinstance(padding, list):
+        padding = tuple(padding)
+    return padding
+
+
 class Decode(cde.DecodeOp):
     """
     Decode the input image in RGB mode.
@@ -136,16 +150,22 @@ class RandomCrop(cde.RandomCropOp):
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
-        self.size = size
-        self.padding = padding
-        self.pad_if_needed = pad_if_needed
-        self.fill_value = fill_value
-        self.padding_mode = padding_mode.value
+        if isinstance(size, int):
+            size = (size, size)
         if padding is None:
             padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
         if isinstance(fill_value, int):  # temporary fix
             fill_value = tuple([fill_value] * 3)
         border_type = DE_C_BORDER_TYPE[padding_mode]
+
+        self.size = size
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill_value = fill_value
+        self.padding_mode = padding_mode.value
+
         super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
 
 
@@ -184,16 +204,23 @@ class RandomCropWithBBox(cde.RandomCropWithBBoxOp):
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
-        self.size = size
-        self.padding = padding
-        self.pad_if_needed = pad_if_needed
-        self.fill_value = fill_value
-        self.padding_mode = padding_mode.value
+        if isinstance(size, int):
+            size = (size, size)
         if padding is None:
             padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
+
         if isinstance(fill_value, int):  # temporary fix
             fill_value = tuple([fill_value] * 3)
         border_type = DE_C_BORDER_TYPE[padding_mode]
+
+        self.size = size
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill_value = fill_value
+        self.padding_mode = padding_mode.value
+
         super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
 
 
@@ -292,6 +319,8 @@ class Resize(cde.ResizeOp):
 
     @check_resize_interpolation
     def __init__(self, size, interpolation=Inter.LINEAR):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.interpolation = interpolation
         interpoltn = DE_C_INTER_MODE[interpolation]
@@ -359,6 +388,8 @@ class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -396,6 +427,8 @@ class RandomResizedCrop(cde.RandomCropAndResizeOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -417,6 +450,8 @@ class CenterCrop(cde.CenterCropOp):
 
     @check_crop
     def __init__(self, size):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         super().__init__(*size)
 
@@ -442,12 +477,26 @@ class RandomColorAdjust(cde.RandomColorAdjustOp):
 
     @check_random_color_adjust
     def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)):
+        brightness = self.expand_values(brightness)
+        contrast = self.expand_values(contrast)
+        saturation = self.expand_values(saturation)
+        hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False)
+
         self.brightness = brightness
         self.contrast = contrast
         self.saturation = saturation
         self.hue = hue
+
         super().__init__(*brightness, *contrast, *saturation, *hue)
 
+    def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
+        if isinstance(value, numbers.Number):
+            value = [center - value, center + value]
+            if non_negative:
+                value[0] = max(0, value[0])
+            check_range(value, bound)
+        return (value[0], value[1])
+
 
 class RandomRotation(cde.RandomRotationOp):
     """
@@ -485,6 +534,8 @@ class RandomRotation(cde.RandomRotationOp):
         self.expand = expand
         self.center = center
         self.fill_value = fill_value
+        if isinstance(degrees, numbers.Number):
+            degrees = (-degrees, degrees)
         if center is None:
             center = (-1, -1)
         if isinstance(fill_value, int):  # temporary fix
@@ -584,6 +635,8 @@ class RandomCropDecodeResize(cde.RandomCropDecodeResizeOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -623,12 +676,14 @@ class Pad(cde.PadOp):
 
     @check_pad
     def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
-        self.padding = padding
-        self.fill_value = fill_value
-        self.padding_mode = padding_mode
+        padding = parse_padding(padding)
         if isinstance(fill_value, int):  # temporary fix
             fill_value = tuple([fill_value] * 3)
         padding_mode = DE_C_BORDER_TYPE[padding_mode]
+
+        self.padding = padding
+        self.fill_value = fill_value
+        self.padding_mode = padding_mode
         super().__init__(*padding, padding_mode, *fill_value)
 
 
diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py
index b252c3434b..3bfd6b0644 100644
--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
@@ -28,6 +28,7 @@ import numpy as np
 from PIL import Image
 
 from . import py_transforms_util as util
+from .c_transforms import parse_padding
 from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \
     check_normalize_py, check_random_crop, check_random_color_adjust, check_random_rotation, \
     check_transforms_list, check_random_apply, check_ten_crop, check_num_channels, check_pad, \
@@ -295,6 +296,10 @@ class RandomCrop:
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
+        if padding is None:
+            padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
         self.size = size
         self.padding = padding
         self.pad_if_needed = pad_if_needed
@@ -753,6 +758,8 @@ class TenCrop:
 
     @check_ten_crop
     def __init__(self, size, use_vertical_flip=False):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.use_vertical_flip = use_vertical_flip
 
@@ -877,6 +884,8 @@ class Pad:
 
     @check_pad
     def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
+        parse_padding(padding)
+
         self.padding = padding
         self.fill_value = fill_value
         self.padding_mode = DE_PY_BORDER_TYPE[padding_mode]
@@ -1129,56 +1138,23 @@ class RandomAffine:
     def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0):
         # Parameter checking
         # rotation
-        if isinstance(degrees, numbers.Number):
-            if degrees < 0:
-                raise ValueError("If degrees is a single number, it must be positive.")
-            self.degrees = (-degrees, degrees)
-        elif isinstance(degrees, (tuple, list)) and len(degrees) == 2:
-            self.degrees = degrees
-        else:
-            raise TypeError("If degrees is a list or tuple, it must be of length 2.")
-
-        # translation
-        if translate is not None:
-            if isinstance(translate, (tuple, list)) and len(translate) == 2:
-                for t in translate:
-                    if t < 0.0 or t > 1.0:
-                        raise ValueError("translation values should be between 0 and 1")
-            else:
-                raise TypeError("translate should be a list or tuple of length 2.")
-        self.translate = translate
-
-        # scale
-        if scale is not None:
-            if isinstance(scale, (tuple, list)) and len(scale) == 2:
-                for s in scale:
-                    if s <= 0:
-                        raise ValueError("scale values should be positive")
-            else:
-                raise TypeError("scale should be a list or tuple of length 2.")
-        self.scale_ranges = scale
-
-        # shear
         if shear is not None:
             if isinstance(shear, numbers.Number):
-                if shear < 0:
-                    raise ValueError("If shear is a single number, it must be positive.")
-                self.shear = (-1 * shear, shear)
-            elif isinstance(shear, (tuple, list)) and (len(shear) == 2 or len(shear) == 4):
-                # X-Axis shear with [min, max]
+                shear = (-1 * shear, shear)
+            else:
                 if len(shear) == 2:
-                    self.shear = [shear[0], shear[1], 0., 0.]
+                    shear = [shear[0], shear[1], 0., 0.]
                 elif len(shear) == 4:
-                    self.shear = [s for s in shear]
-            else:
-                raise TypeError("shear should be a list or tuple and it must be of length 2 or 4.")
-        else:
-            self.shear = shear
+                    shear = [s for s in shear]
 
-        # resample
-        self.resample = DE_PY_INTER_MODE[resample]
+        if isinstance(degrees, numbers.Number):
+            degrees = (-degrees, degrees)
 
-        # fill_value
+        self.degrees = degrees
+        self.translate = translate
+        self.scale_ranges = scale
+        self.shear = shear
+        self.resample = DE_PY_INTER_MODE[resample]
         self.fill_value = fill_value
 
     def __call__(self, img):
diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index b49116349b..4cb6613359 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -16,47 +16,35 @@
 """
 import numbers
 from functools import wraps
-
+import numpy as np
 from mindspore._c_dataengine import TensorOp
 
 from .utils import Inter, Border
-from ...transforms.validators import check_pos_int32, check_pos_float32, check_value, check_uint8, FLOAT_MAX_INTEGER, \
-    check_bool, check_2tuple, check_range, check_list, check_type, check_positive, INT32_MAX
-
-
-def check_inter_mode(mode):
-    if not isinstance(mode, Inter):
-        raise ValueError("Invalid interpolation mode.")
-
-
-def check_border_type(mode):
-    if not isinstance(mode, Border):
-        raise ValueError("Invalid padding mode.")
+from ...core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
+    check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list
 
 
 def check_crop_size(size):
     """Wrapper method to check the parameters of crop size."""
+    type_check(size, (int, list, tuple), "size")
     if isinstance(size, int):
-        size = (size, size)
+        check_value(size, (1, FLOAT_MAX_INTEGER))
     elif isinstance(size, (tuple, list)) and len(size) == 2:
-        size = size
+        for value in size:
+            check_value(value, (1, FLOAT_MAX_INTEGER))
     else:
         raise TypeError("Size should be a single integer or a list/tuple (h, w) of length 2.")
-    for value in size:
-        check_pos_int32(value)
-    return size
 
 
 def check_resize_size(size):
     """Wrapper method to check the parameters of resize."""
     if isinstance(size, int):
-        check_pos_int32(size)
+        check_value(size, (1, FLOAT_MAX_INTEGER))
     elif isinstance(size, (tuple, list)) and len(size) == 2:
-        for value in size:
-            check_value(value, (1, INT32_MAX))
+        for i, value in enumerate(size):
+            check_value(value, (1, INT32_MAX), "size at dim {0}".format(i))
     else:
         raise TypeError("Size should be a single integer or a list/tuple (h, w) of length 2.")
-    return size
 
 
 def check_normalize_c_param(mean, std):
@@ -72,9 +60,9 @@ def check_normalize_py_param(mean, std):
     if len(mean) != len(std):
         raise ValueError("Length of mean and std must be equal")
     for mean_value in mean:
-        check_value(mean_value, [0., 1.])
+        check_value(mean_value, [0., 1.], "mean_value")
     for std_value in std:
-        check_value(std_value, [0., 1.])
+        check_value(std_value, [0., 1.], "std_value")
 
 
 def check_fill_value(fill_value):
@@ -85,66 +73,37 @@ def check_fill_value(fill_value):
             check_uint8(value)
     else:
         raise TypeError("fill_value should be a single integer or a 3-tuple.")
-    return fill_value
 
 
 def check_padding(padding):
     """Parsing the padding arguments and check if it is legal."""
-    if isinstance(padding, numbers.Number):
-        top = bottom = left = right = padding
-
-    elif isinstance(padding, (tuple, list)):
-        if len(padding) == 2:
-            left = right = padding[0]
-            top = bottom = padding[1]
-        elif len(padding) == 4:
-            left = padding[0]
-            top = padding[1]
-            right = padding[2]
-            bottom = padding[3]
-        else:
+    type_check(padding, (tuple, list, numbers.Number), "padding")
+    if isinstance(padding, (tuple, list)):
+        if len(padding) not in (2, 4):
             raise ValueError("The size of the padding list or tuple should be 2 or 4.")
-    else:
-        raise TypeError("Padding can be any of: a number, a tuple or list of size 2 or 4.")
-    if not (isinstance(left, int) and isinstance(top, int) and isinstance(right, int) and isinstance(bottom, int)):
-        raise TypeError("Padding value should be integer.")
-    if left < 0 or top < 0 or right < 0 or bottom < 0:
-        raise ValueError("Padding value could not be negative.")
-    return left, top, right, bottom
+        for i, pad_value in enumerate(padding):
+            type_check(pad_value, (int,), "padding[{}]".format(i))
+            check_value(pad_value, (0, INT32_MAX), "pad_value")
 
 
 def check_degrees(degrees):
     """Check if the degrees is legal."""
+    type_check(degrees, (numbers.Number, list, tuple), "degrees")
     if isinstance(degrees, numbers.Number):
-        if degrees < 0:
-            raise ValueError("If degrees is a single number, it cannot be negative.")
-        degrees = (-degrees, degrees)
+        check_value(degrees, (0, float("inf")), "degrees")
     elif isinstance(degrees, (list, tuple)):
         if len(degrees) != 2:
             raise TypeError("If degrees is a sequence, the length must be 2.")
-    else:
-        raise TypeError("Degrees must be a single non-negative number or a sequence")
-    return degrees
 
 
 def check_random_color_adjust_param(value, input_name, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
     """Check the parameters in random color adjust operation."""
+    type_check(value, (numbers.Number, list, tuple), input_name)
     if isinstance(value, numbers.Number):
         if value < 0:
             raise ValueError("The input value of {} cannot be negative.".format(input_name))
-        # convert value into a range
-        value = [center - value, center + value]
-        if non_negative:
-            value[0] = max(0, value[0])
     elif isinstance(value, (list, tuple)) and len(value) == 2:
-        if not bound[0] <= value[0] <= value[1] <= bound[1]:
-            raise ValueError("Please check your value range of {} is valid and "
-                             "within the bound {}".format(input_name, bound))
-    else:
-        raise TypeError("Input of {} should be either a single value, or a list/tuple of "
-                        "length 2.".format(input_name))
-    factor = (value[0], value[1])
-    return factor
+        check_range(value, bound)
 
 
 def check_erasing_value(value):
@@ -155,173 +114,105 @@ def check_erasing_value(value):
 
 
 def check_crop(method):
-    """A wrapper that wrap a parameter checker to the original function(crop operation)."""
+    """A wrapper that wraps a parameter checker to the original function(crop operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        size = (list(args) + [None])[0]
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_resize_interpolation(method):
-    """A wrapper that wrap a parameter checker to the original function(resize interpolation operation)."""
+    """A wrapper that wraps a parameter checker to the original function(resize interpolation operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        size, interpolation = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_resize_size(size)
-        kwargs["size"] = size
-
+        [size, interpolation], _ = parse_user_args(method, *args, **kwargs)
+        check_resize_size(size)
         if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+            type_check(interpolation, (Inter,), "interpolation")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_resize(method):
-    """A wrapper that wrap a parameter checker to the original function(resize operation)."""
+    """A wrapper that wraps a parameter checker to the original function(resize operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        size = (list(args) + [None])[0]
-        if "size" in kwargs:
-            size = kwargs.get("size")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_resize_size(size)
-        kwargs["size"] = size
+        [size], _ = parse_user_args(method, *args, **kwargs)
+        check_resize_size(size)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_random_resize_crop(method):
-    """A wrapper that wrap a parameter checker to the original function(random resize crop operation)."""
+    """A wrapper that wraps a parameter checker to the original function(random resize crop operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        size, scale, ratio, interpolation, max_attempts = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
-        if "max_attempts" in kwargs:
-            max_attempts = kwargs.get("max_attempts")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size, scale, ratio, interpolation, max_attempts], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
         if scale is not None:
             check_range(scale, [0, FLOAT_MAX_INTEGER])
-            kwargs["scale"] = scale
         if ratio is not None:
             check_range(ratio, [0, FLOAT_MAX_INTEGER])
-            check_positive(ratio[0])
-            kwargs["ratio"] = ratio
+            check_positive(ratio[0], "ratio[0]")
         if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+            type_check(interpolation, (Inter,), "interpolation")
         if max_attempts is not None:
-            check_pos_int32(max_attempts)
-            kwargs["max_attempts"] = max_attempts
+            check_value(max_attempts, (1, FLOAT_MAX_INTEGER))
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_prob(method):
-    """A wrapper that wrap a parameter checker(check the probability) to the original function."""
+    """A wrapper that wraps a parameter checker(check the probability) to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        prob = (list(args) + [None])[0]
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
+        [prob], _ = parse_user_args(method, *args, **kwargs)
+        type_check(prob, (float, int,), "prob")
+        check_value(prob, [0., 1.], "prob")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_normalize_c(method):
-    """A wrapper that wrap a parameter checker to the original function(normalize operation written in C++)."""
+    """A wrapper that wraps a parameter checker to the original function(normalize operation written in C++)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        mean, std = args
-        if "mean" in kwargs:
-            mean = kwargs.get("mean")
-        if "std" in kwargs:
-            std = kwargs.get("std")
-
-        if mean is None:
-            raise ValueError("mean is not provided.")
-        if std is None:
-            raise ValueError("std is not provided.")
+        [mean, std], _ = parse_user_args(method, *args, **kwargs)
         check_normalize_c_param(mean, std)
-        kwargs["mean"] = mean
-        kwargs["std"] = std
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_normalize_py(method):
-    """A wrapper that wrap a parameter checker to the original function(normalize operation written in Python)."""
+    """A wrapper that wraps a parameter checker to the original function(normalize operation written in Python)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        mean, std = args
-        if "mean" in kwargs:
-            mean = kwargs.get("mean")
-        if "std" in kwargs:
-            std = kwargs.get("std")
-
-        if mean is None:
-            raise ValueError("mean is not provided.")
-        if std is None:
-            raise ValueError("std is not provided.")
+        [mean, std], _ = parse_user_args(method, *args, **kwargs)
         check_normalize_py_param(mean, std)
-        kwargs["mean"] = mean
-        kwargs["std"] = std
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -331,38 +222,17 @@ def check_random_crop(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        size, padding, pad_if_needed, fill_value, padding_mode = args
-
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "padding" in kwargs:
-            padding = kwargs.get("padding")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if "padding_mode" in kwargs:
-            padding_mode = kwargs.get("padding_mode")
-        if "pad_if_needed" in kwargs:
-            pad_if_needed = kwargs.get("pad_if_needed")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
-
+        [size, padding, pad_if_needed, fill_value, padding_mode], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
+        type_check(pad_if_needed, (bool,), "pad_if_needed")
         if padding is not None:
-            padding = check_padding(padding)
-            kwargs["padding"] = padding
+            check_padding(padding)
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
         if padding_mode is not None:
-            check_border_type(padding_mode)
-            kwargs["padding_mode"] = padding_mode
-        if pad_if_needed is not None:
-            kwargs["pad_if_needed"] = pad_if_needed
+            type_check(padding_mode, (Border,), "padding_mode")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -372,27 +242,13 @@ def check_random_color_adjust(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 4 * [None])[:4]
-        brightness, contrast, saturation, hue = args
-        if "brightness" in kwargs:
-            brightness = kwargs.get("brightness")
-        if "contrast" in kwargs:
-            contrast = kwargs.get("contrast")
-        if "saturation" in kwargs:
-            saturation = kwargs.get("saturation")
-        if "hue" in kwargs:
-            hue = kwargs.get("hue")
-
-        if brightness is not None:
-            kwargs["brightness"] = check_random_color_adjust_param(brightness, "brightness")
-        if contrast is not None:
-            kwargs["contrast"] = check_random_color_adjust_param(contrast, "contrast")
-        if saturation is not None:
-            kwargs["saturation"] = check_random_color_adjust_param(saturation, "saturation")
-        if hue is not None:
-            kwargs["hue"] = check_random_color_adjust_param(hue, 'hue', center=0, bound=(-0.5, 0.5), non_negative=False)
-
-        return method(self, **kwargs)
+        [brightness, contrast, saturation, hue], _ = parse_user_args(method, *args, **kwargs)
+        check_random_color_adjust_param(brightness, "brightness")
+        check_random_color_adjust_param(contrast, "contrast")
+        check_random_color_adjust_param(saturation, "saturation")
+        check_random_color_adjust_param(hue, 'hue', center=0, bound=(-0.5, 0.5), non_negative=False)
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -402,38 +258,19 @@ def check_random_rotation(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        degrees, resample, expand, center, fill_value = args
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
-        if "resample" in kwargs:
-            resample = kwargs.get("resample")
-        if "expand" in kwargs:
-            expand = kwargs.get("expand")
-        if "center" in kwargs:
-            center = kwargs.get("center")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-
-        if degrees is None:
-            raise ValueError("degrees is not provided.")
-        degrees = check_degrees(degrees)
-        kwargs["degrees"] = degrees
+        [degrees, resample, expand, center, fill_value], _ = parse_user_args(method, *args, **kwargs)
+        check_degrees(degrees)
 
         if resample is not None:
-            check_inter_mode(resample)
-            kwargs["resample"] = resample
+            type_check(resample, (Inter,), "resample")
         if expand is not None:
-            check_bool(expand)
-            kwargs["expand"] = expand
+            type_check(expand, (bool,), "expand")
         if center is not None:
-            check_2tuple(center)
-            kwargs["center"] = center
+            check_2tuple(center, "center")
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -443,16 +280,11 @@ def check_transforms_list(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms = (list(args) + [None])[0]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms], _ = parse_user_args(method, *args, **kwargs)
 
-        check_list(transforms)
-        kwargs["transforms"] = transforms
+        type_check(transforms, (list,), "transforms")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -462,21 +294,14 @@ def check_random_apply(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms, prob = (list(args) + 2 * [None])[:2]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
-        check_list(transforms)
-        kwargs["transforms"] = transforms
-
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
+        [transforms, prob], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transforms, (list,), "transforms")
+
         if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
+            type_check(prob, (float, int,), "prob")
+            check_value(prob, [0., 1.], "prob")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -486,23 +311,13 @@ def check_ten_crop(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        size, use_vertical_flip = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "use_vertical_flip" in kwargs:
-            use_vertical_flip = kwargs.get("use_vertical_flip")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size, use_vertical_flip], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
         if use_vertical_flip is not None:
-            check_bool(use_vertical_flip)
-            kwargs["use_vertical_flip"] = use_vertical_flip
+            type_check(use_vertical_flip, (bool,), "use_vertical_flip")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -512,16 +327,13 @@ def check_num_channels(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        num_output_channels = (list(args) + [None])[0]
-        if "num_output_channels" in kwargs:
-            num_output_channels = kwargs.get("num_output_channels")
+        [num_output_channels], _ = parse_user_args(method, *args, **kwargs)
         if num_output_channels is not None:
             if num_output_channels not in (1, 3):
                 raise ValueError("Number of channels of the output grayscale image"
                                  "should be either 1 or 3. Got {0}".format(num_output_channels))
-            kwargs["num_output_channels"] = num_output_channels
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -531,28 +343,12 @@ def check_pad(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        padding, fill_value, padding_mode = args
-        if "padding" in kwargs:
-            padding = kwargs.get("padding")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if "padding_mode" in kwargs:
-            padding_mode = kwargs.get("padding_mode")
-
-        if padding is None:
-            raise ValueError("padding is not provided.")
-        padding = check_padding(padding)
-        kwargs["padding"] = padding
+        [padding, fill_value, padding_mode], _ = parse_user_args(method, *args, **kwargs)
+        check_padding(padding)
+        check_fill_value(fill_value)
+        type_check(padding_mode, (Border,), "padding_mode")
 
-        if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
-        if padding_mode is not None:
-            check_border_type(padding_mode)
-            kwargs["padding_mode"] = padding_mode
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -562,26 +358,13 @@ def check_random_perspective(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        distortion_scale, prob, interpolation = args
-        if "distortion_scale" in kwargs:
-            distortion_scale = kwargs.get("distortion_scale")
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
-
-        if distortion_scale is not None:
-            check_value(distortion_scale, [0., 1.])
-            kwargs["distortion_scale"] = distortion_scale
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
-        if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+        [distortion_scale, prob, interpolation], _ = parse_user_args(method, *args, **kwargs)
 
-        return method(self, **kwargs)
+        check_value(distortion_scale, [0., 1.], "distortion_scale")
+        check_value(prob, [0., 1.], "prob")
+        type_check(interpolation, (Inter,), "interpolation")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -591,28 +374,13 @@ def check_mix_up(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        batch_size, alpha, is_single = args
-        if "batch_size" in kwargs:
-            batch_size = kwargs.get("batch_size")
-        if "alpha" in kwargs:
-            alpha = kwargs.get("alpha")
-        if "is_single" in kwargs:
-            is_single = kwargs.get("is_single")
-
-        if batch_size is None:
-            raise ValueError("batch_size")
-        check_pos_int32(batch_size)
-        kwargs["batch_size"] = batch_size
-        if alpha is None:
-            raise ValueError("alpha")
-        check_positive(alpha)
-        kwargs["alpha"] = alpha
-        if is_single is not None:
-            check_type(is_single, bool)
-            kwargs["is_single"] = is_single
-
-        return method(self, **kwargs)
+        [batch_size, alpha, is_single], _ = parse_user_args(method, *args, **kwargs)
+
+        check_value(batch_size, (1, FLOAT_MAX_INTEGER))
+        check_positive(alpha, "alpha")
+        type_check(is_single, (bool,), "is_single")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -622,41 +390,16 @@ def check_random_erasing(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 6 * [None])[:6]
-        prob, scale, ratio, value, inplace, max_attempts = args
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if "value" in kwargs:
-            value = kwargs.get("value")
-        if "inplace" in kwargs:
-            inplace = kwargs.get("inplace")
-        if "max_attempts" in kwargs:
-            max_attempts = kwargs.get("max_attempts")
+        [prob, scale, ratio, value, inplace, max_attempts], _ = parse_user_args(method, *args, **kwargs)
 
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
-        if scale is not None:
-            check_range(scale, [0, FLOAT_MAX_INTEGER])
-            kwargs["scale"] = scale
-        if ratio is not None:
-            check_range(ratio, [0, FLOAT_MAX_INTEGER])
-            kwargs["ratio"] = ratio
-        if value is not None:
-            check_erasing_value(value)
-            kwargs["value"] = value
-        if inplace is not None:
-            check_bool(inplace)
-            kwargs["inplace"] = inplace
-        if max_attempts is not None:
-            check_pos_int32(max_attempts)
-            kwargs["max_attempts"] = max_attempts
+        check_value(prob, [0., 1.], "prob")
+        check_range(scale, [0, FLOAT_MAX_INTEGER])
+        check_range(ratio, [0, FLOAT_MAX_INTEGER])
+        check_erasing_value(value)
+        type_check(inplace, (bool,), "inplace")
+        check_value(max_attempts, (1, FLOAT_MAX_INTEGER))
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -666,23 +409,12 @@ def check_cutout(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        length, num_patches = args
-        if "length" in kwargs:
-            length = kwargs.get("length")
-        if "num_patches" in kwargs:
-            num_patches = kwargs.get("num_patches")
-
-        if length is None:
-            raise ValueError("length")
-        check_pos_int32(length)
-        kwargs["length"] = length
+        [length, num_patches], _ = parse_user_args(method, *args, **kwargs)
 
-        if num_patches is not None:
-            check_pos_int32(num_patches)
-            kwargs["num_patches"] = num_patches
+        check_value(length, (1, FLOAT_MAX_INTEGER))
+        check_value(num_patches, (1, FLOAT_MAX_INTEGER))
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -692,17 +424,9 @@ def check_linear_transform(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        transformation_matrix, mean_vector = args
-        if "transformation_matrix" in kwargs:
-            transformation_matrix = kwargs.get("transformation_matrix")
-        if "mean_vector" in kwargs:
-            mean_vector = kwargs.get("mean_vector")
-
-        if transformation_matrix is None:
-            raise ValueError("transformation_matrix is not provided.")
-        if mean_vector is None:
-            raise ValueError("mean_vector is not provided.")
+        [transformation_matrix, mean_vector], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transformation_matrix, (np.ndarray,), "transformation_matrix")
+        type_check(mean_vector, (np.ndarray,), "mean_vector")
 
         if transformation_matrix.shape[0] != transformation_matrix.shape[1]:
             raise ValueError("transformation_matrix should be a square matrix. "
@@ -711,10 +435,7 @@ def check_linear_transform(method):
             raise ValueError("mean_vector length {0} should match either one dimension of the square"
                              "transformation_matrix {1}.".format(mean_vector.shape[0], transformation_matrix.shape))
 
-        kwargs["transformation_matrix"] = transformation_matrix
-        kwargs["mean_vector"] = mean_vector
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -724,67 +445,40 @@ def check_random_affine(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 6 * [None])[:6]
-        degrees, translate, scale, shear, resample, fill_value = args
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
-        if "translate" in kwargs:
-            translate = kwargs.get("translate")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "shear" in kwargs:
-            shear = kwargs.get("shear")
-        if "resample" in kwargs:
-            resample = kwargs.get("resample")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-
-        if degrees is None:
-            raise ValueError("degrees is not provided.")
-        degrees = check_degrees(degrees)
-        kwargs["degrees"] = degrees
+        [degrees, translate, scale, shear, resample, fill_value], _ = parse_user_args(method, *args, **kwargs)
+        check_degrees(degrees)
 
         if translate is not None:
-            if isinstance(translate, (tuple, list)) and len(translate) == 2:
-                for t in translate:
-                    if t < 0.0 or t > 1.0:
-                        raise ValueError("translation values should be between 0 and 1")
-            else:
+            if type_check(translate, (list, tuple), "translate"):
+                translate_names = ["translate_{0}".format(i) for i in range(len(translate))]
+                type_check_list(translate, (int, float), translate_names)
+            if len(translate) != 2:
                 raise TypeError("translate should be a list or tuple of length 2.")
-            kwargs["translate"] = translate
+            for i, t in enumerate(translate):
+                check_value(t, [0.0, 1.0], "translate at {0}".format(i))
 
         if scale is not None:
-            if isinstance(scale, (tuple, list)) and len(scale) == 2:
-                for s in scale:
-                    if s <= 0:
-                        raise ValueError("scale values should be positive")
+            type_check(scale, (tuple, list), "scale")
+            if len(scale) == 2:
+                for i, s in enumerate(scale):
+                    check_positive(s, "scale[{}]".format(i))
             else:
                 raise TypeError("scale should be a list or tuple of length 2.")
-            kwargs["scale"] = scale
 
         if shear is not None:
+            type_check(shear, (numbers.Number, tuple, list), "shear")
             if isinstance(shear, numbers.Number):
-                if shear < 0:
-                    raise ValueError("If shear is a single number, it must be positive.")
-                shear = (-1 * shear, shear)
-            elif isinstance(shear, (tuple, list)) and (len(shear) == 2 or len(shear) == 4):
-                # X-Axis shear with [min, max]
-                if len(shear) == 2:
-                    shear = [shear[0], shear[1], 0., 0.]
-                elif len(shear) == 4:
-                    shear = [s for s in shear]
+                check_positive(shear, "shear")
             else:
-                raise TypeError("shear should be a list or tuple and it must be of length 2 or 4.")
-            kwargs["shear"] = shear
+                if len(shear) not in (2, 4):
+                    raise TypeError("shear must be of length 2 or 4.")
+
+            type_check(resample, (Inter,), "resample")
 
-        if resample is not None:
-            check_inter_mode(resample)
-            kwargs["resample"] = resample
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -794,24 +488,11 @@ def check_rescale(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        rescale, shift = (list(args) + 2 * [None])[:2]
-        if "rescale" in kwargs:
-            rescale = kwargs.get("rescale")
-        if "shift" in kwargs:
-            shift = kwargs.get("shift")
-
-        if rescale is None:
-            raise ValueError("rescale is not provided.")
+        [rescale, shift], _ = parse_user_args(method, *args, **kwargs)
         check_pos_float32(rescale)
-        kwargs["rescale"] = rescale
-
-        if shift is None:
-            raise ValueError("shift is not provided.")
-        if not isinstance(shift, numbers.Number):
-            raise TypeError("shift is not a number.")
-        kwargs["shift"] = shift
+        type_check(shift, (numbers.Number,), "shift")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -821,33 +502,16 @@ def check_uniform_augment_cpp(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        operations, num_ops = (list(args) + 2 * [None])[:2]
-        if "operations" in kwargs:
-            operations = kwargs.get("operations")
-        else:
-            raise ValueError("operations list required")
-        if "num_ops" in kwargs:
-            num_ops = kwargs.get("num_ops")
-        else:
-            num_ops = 2
-
-        if not isinstance(num_ops, int):
-            raise ValueError("Number of operations should be an integer.")
-
-        if num_ops <= 0:
-            raise ValueError("num_ops should be greater than zero")
+        [operations, num_ops], _ = parse_user_args(method, *args, **kwargs)
+        type_check(num_ops, (int,), "num_ops")
+        check_positive(num_ops, "num_ops")
+
         if num_ops > len(operations):
             raise ValueError("num_ops is greater than operations list size")
-        if not isinstance(operations, list):
-            raise TypeError("operations is not a python list")
-        for op in operations:
-            if not isinstance(op, TensorOp):
-                raise ValueError("operations list only accepts C++ operations.")
+        tensor_ops = ["tensor_op_{0}".format(i) for i in range(len(operations))]
+        type_check_list(operations, (TensorOp,), tensor_ops)
 
-        kwargs["num_ops"] = num_ops
-        kwargs["operations"] = operations
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -857,23 +521,11 @@ def check_bounding_box_augment_cpp(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transform, ratio = (list(args) + 2 * [None])[:2]
-        if "transform" in kwargs:
-            transform = kwargs.get("transform")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if not isinstance(ratio, float) and not isinstance(ratio, int):
-            raise ValueError("Ratio should be an int or float.")
-        if ratio is not None:
-            check_value(ratio, [0., 1.])
-            kwargs["ratio"] = ratio
-        else:
-            ratio = 0.3
-        if not isinstance(transform, TensorOp):
-            raise ValueError("Transform can only be a C++ operation.")
-        kwargs["transform"] = transform
-        kwargs["ratio"] = ratio
-        return method(self, **kwargs)
+        [transform, ratio], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ratio, (float, int), "ratio")
+        check_value(ratio, [0., 1.], "ratio")
+        type_check(transform, (TensorOp,), "transform")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -883,29 +535,22 @@ def check_uniform_augment_py(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms, num_ops = (list(args) + 2 * [None])[:2]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms, num_ops], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transforms, (list,), "transforms")
+
         if not transforms:
             raise ValueError("transforms list is empty.")
-        check_list(transforms)
+
         for transform in transforms:
             if isinstance(transform, TensorOp):
                 raise ValueError("transform list only accepts Python operations.")
-        kwargs["transforms"] = transforms
 
-        if "num_ops" in kwargs:
-            num_ops = kwargs.get("num_ops")
-        if num_ops is not None:
-            check_type(num_ops, int)
-            check_positive(num_ops)
-            if num_ops > len(transforms):
-                raise ValueError("num_ops cannot be greater than the length of transforms list.")
-            kwargs["num_ops"] = num_ops
+        type_check(num_ops, (int,), "num_ops")
+        check_positive(num_ops, "num_ops")
+        if num_ops > len(transforms):
+            raise ValueError("num_ops cannot be greater than the length of transforms list.")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -915,22 +560,16 @@ def check_positive_degrees(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        degrees = (list(args) + [None])[0]
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
-
-        if degrees is not None:
-            if isinstance(degrees, (list, tuple)):
-                if len(degrees) != 2:
-                    raise ValueError("Degrees must be a sequence with length 2.")
-                if degrees[0] < 0:
-                    raise ValueError("Degrees range must be non-negative.")
-                if degrees[0] > degrees[1]:
-                    raise ValueError("Degrees should be in (min,max) format. Got (max,min).")
-            else:
-                raise TypeError("Degrees must be a sequence in (min,max) format.")
+        [degrees], _ = parse_user_args(method, *args, **kwargs)
+
+        if isinstance(degrees, (list, tuple)):
+            if len(degrees) != 2:
+                raise ValueError("Degrees must be a sequence with length 2.")
+            check_positive(degrees[0], "degrees[0]")
+            if degrees[0] > degrees[1]:
+                raise ValueError("Degrees should be in (min,max) format. Got (max,min).")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -940,18 +579,12 @@ def check_compose_list(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms = (list(args) + [None])[0]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(transforms, (list,), transforms)
         if not transforms:
             raise ValueError("transforms list is empty.")
-        if not isinstance(transforms, list):
-            raise TypeError("transforms is not a python list")
-
-        kwargs["transforms"] = transforms
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/model_zoo/__init__.py b/mindspore/model_zoo/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/mindspore/nn/__init__.py b/mindspore/nn/__init__.py
index 8d5e7d3b0a..e5c133a9a6 100644
--- a/mindspore/nn/__init__.py
+++ b/mindspore/nn/__init__.py
@@ -17,13 +17,15 @@ Neural Networks Cells.
 
 Pre-defined building blocks or computing units to construct Neural Networks.
 """
-from . import layer, loss, optim, metrics, wrap
+from . import layer, loss, optim, metrics, wrap, distribution
 from .cell import Cell, GraphKernel
 from .layer import *
 from .loss import *
 from .optim import *
 from .metrics import *
 from .wrap import *
+from .distribution import *
+
 
 __all__ = ["Cell", "GraphKernel"]
 __all__.extend(layer.__all__)
@@ -31,5 +33,7 @@ __all__.extend(loss.__all__)
 __all__.extend(optim.__all__)
 __all__.extend(metrics.__all__)
 __all__.extend(wrap.__all__)
+__all__.extend(distribution.__all__)
+
 
 __all__.sort()
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index cffe00a920..3eec96f0b5 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -16,6 +16,7 @@
 import time
 import gc
 from collections import OrderedDict
+import numpy
 from mindspore import log as logger
 from .. import context
 from ..common import dtype as mstype
@@ -211,6 +212,9 @@ class Cell:
         if context.get_context("mode") == context.GRAPH_MODE:
             out = self.compile_and_run(*inputs)
             return out
+        for item in inputs:
+            if isinstance(item, numpy.ndarray):
+                raise TypeError("cell inputs should not be numpy array.")
         self.init_parameters_data()
         orign_grad = []
         if self.requires_grad is True:
@@ -827,6 +831,20 @@ class Cell:
         self._backward_hook = HookBackward(fn, self.cls_name + "(" + str(id(self)) + ")")
         self.enable_hook = True
 
+    def set_param_ps(self, recurse=True):
+        """
+        Set whether the trainable parameter is updated by parameter server.
+
+        Note:
+            This only works when running task in parameter server mode.
+
+        Args:
+            recurse (bool): Whether sets the trainable parameters of subcells. Default: True.
+        """
+        params = self.trainable_params(recurse)
+        for param in params:
+            param.set_param_ps()
+
 class GraphKernel(Cell):
     """
     Base class for GraphKernel.
diff --git a/mindspore/nn/distribution/__init__.py b/mindspore/nn/distribution/__init__.py
new file mode 100644
index 0000000000..55b4b03ef7
--- /dev/null
+++ b/mindspore/nn/distribution/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Distribution.
+
+The high-level components(Distributions) used to construct the probabilistic network.
+"""
+
+from .distribution import Distribution
+from .normal import Normal
+from .bernoulli import Bernoulli
+
+__all__ = ['Distribution',
+           'Normal',
+           'Bernoulli',]
diff --git a/mindspore/nn/distribution/_utils/__init__.py b/mindspore/nn/distribution/_utils/__init__.py
new file mode 100644
index 0000000000..816485643a
--- /dev/null
+++ b/mindspore/nn/distribution/_utils/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Distribution operation utility functions.
+"""
+from .utils import *
+
+__all__ = ['check_scalar', 'convert_to_batch', 'cast_to_tensor',
+           'calc_batch_size', 'check_greater',
+           'check_greater_equal_zero',
+           'calc_broadcast_shape_from_param',
+           'check_scalar_from_param', 'check_prob']
diff --git a/mindspore/nn/distribution/_utils/utils.py b/mindspore/nn/distribution/_utils/utils.py
new file mode 100644
index 0000000000..c790a66f25
--- /dev/null
+++ b/mindspore/nn/distribution/_utils/utils.py
@@ -0,0 +1,199 @@
+
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Utitly functions to help distribution class."""
+import numpy as np
+from mindspore.ops import _utils as utils
+from ....common.tensor import Tensor
+from ....common.parameter import Parameter
+from ....common import dtype as mstype
+
+
+def check_scalar(value):
+    """
+    Check if input value is a scalar.
+    """
+    return np.isscalar(value)
+
+
+def cast_to_tensor(t, dtype=mstype.float32):
+    """
+    Cast an user input value into a Tensor of dtype.
+
+    Args:
+        t (int, float, list, numpy.ndarray, Tensor, Parameter): object to be cast to Tensor.
+        dtype (mindspore.dtype): dtype of the Tensor. Default: mstype.float32.
+
+    Raises:
+        RuntimeError: if t cannot be cast to Tensor.
+
+    Returns:
+        Tensor.
+    """
+    if isinstance(t, Parameter):
+        return t
+    if isinstance(t, Tensor):
+        #check if the Tensor in shape of Tensor(4)
+        if t.dim() == 0:
+            value = t.asnumpy()
+            return Tensor([t], dtype=dtype)
+        #convert the type of tensor to dtype
+        t.set_dtype(dtype)
+        return t
+    if isinstance(t, (list, np.ndarray)):
+        return Tensor(t, dtype=dtype)
+    if check_scalar(t):
+        return Tensor([t], dtype=dtype)
+    raise RuntimeError("Input type is not supported.")
+
+def calc_batch_size(batch_shape):
+    """
+    Calculate the size of a given batch_shape.
+
+    Args:
+        batch_shape (tuple): batch shape to be calculated.
+
+    Returns:
+        int.
+    """
+    return int(np.prod(batch_shape))
+
+def convert_to_batch(t, batch_shape, dtype):
+    """
+    Convert a Tensor to a given batch shape.
+
+    Args:
+        t (Tensor, Parameter): Tensor to be converted.
+        batch_shape (tuple): desired batch shape.
+        dtype (mindspore.dtype): desired dtype.
+
+    Raises:
+        RuntimeError: if the converison cannot be done.
+
+    Returns:
+        Tensor, with shape of batch_shape.
+    """
+    if isinstance(t, Parameter):
+        return t
+    t = cast_to_tensor(t, dtype)
+    if t.shape != batch_shape:
+        mul = calc_batch_size(batch_shape) // t.size()
+        if (calc_batch_size(batch_shape) % t.size()) != 0:
+            raise RuntimeError("Cannot cast the tensor to the given batch shape.")
+        temp = list(t.asnumpy()) * mul
+        temp = np.reshape(temp, batch_shape)
+        return Tensor(temp, dtype)
+    return t
+
+def check_scalar_from_param(params):
+    """
+    Check if params are all scalars.
+
+    Args:
+        params (dict): parameters used to initialize distribution.
+
+    Notes: String parameters are excluded.
+    """
+    for value in params.values():
+        if isinstance(value, (str, type(params['dtype']))):
+            continue
+        elif check_scalar(value):
+            continue
+        else:
+            return False
+    return True
+
+
+def calc_broadcast_shape_from_param(params):
+    """
+    Calculate the broadcast shape from params.
+
+    Args:
+        params (dict): parameters used to initialize distribution.
+
+    Returns:
+        tuple.
+    """
+    broadcast_shape = []
+    for value in params.values():
+        if isinstance(value, (str, type(params['dtype']))):
+            continue
+        if value is None:
+            return None
+        if isinstance(value, Parameter):
+            value_t = value.default_input
+        else:
+            value_t = cast_to_tensor(value, params['dtype'])
+        broadcast_shape = utils.get_broadcast_shape(broadcast_shape, list(value_t.shape), params['name'])
+    return tuple(broadcast_shape)
+
+def check_greater_equal_zero(value, name):
+    """
+    Check if the given Tensor is greater zero.
+
+    Args:
+        value (Tensor, Parameter): value to be checked.
+        name (str) : name of the value.
+
+    Raises:
+        ValueError: if the input value is less than zero.
+
+    """
+    if isinstance(value, Parameter):
+        if not isinstance(value.default_input, Tensor):
+            return
+        value = value.default_input
+    comp = np.less(value.asnumpy(), np.zeros(value.shape))
+    if comp.any():
+        raise ValueError(f'{name} should be greater than zero.')
+
+def check_greater(a, b, name_a, name_b):
+    """
+    Check if Tensor b is strictly greater than Tensor a.
+
+    Args:
+        a (Tensor): input tensor a.
+        b (Tensor): input tensor b.
+        name_a (str): name of Tensor_a.
+        name_b (str): name of Tensor_b.
+
+    Raises:
+        ValueError: if b is less than or equal to a
+    """
+    comp = np.less(a.asnumpy(), b.asnumpy())
+    if not comp.all():
+        raise ValueError(f'{name_a} should be less than {name_b}')
+
+
+def check_prob(p):
+    """
+    Check if p is a proper probability, i.e. 0 <= p <=1.
+
+    Args:
+        p (Tensor, Parameter): value to be checked.
+
+    Raises:
+        ValueError: if p is not a proper probability.
+    """
+    if isinstance(p, Parameter):
+        if not isinstance(p.default_input, Tensor):
+            return
+        p = p.default_input
+    comp = np.less(p.asnumpy(), np.zeros(p.shape))
+    if comp.any():
+        raise ValueError('Probabilities should be greater than or equal to zero')
+    comp = np.greater(p.asnumpy(), np.ones(p.shape))
+    if comp.any():
+        raise ValueError('Probabilities should be less than or equal to one')
diff --git a/mindspore/nn/distribution/bernoulli.py b/mindspore/nn/distribution/bernoulli.py
new file mode 100644
index 0000000000..9aa20d668f
--- /dev/null
+++ b/mindspore/nn/distribution/bernoulli.py
@@ -0,0 +1,168 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Bernoulli Distribution"""
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from .distribution import Distribution
+from ._utils.utils import cast_to_tensor, check_prob
+from ...common import dtype as mstype
+
+class Bernoulli(Distribution):
+    """
+    Example class: Bernoulli Distribution.
+
+    Args:
+        probs (int, float, list, numpy.ndarray, Tensor, Parameter): probability of 1 as outcome.
+        seed (int): seed to use in sampling. Default: 0.
+        dtype (mindspore.dtype): type of the distribution. Default: mstype.int32.
+        name (str): name of the distribution. Default: Bernoulli.
+
+    Note:
+        probs should be proper probabilities (0 <= p <= 1).
+
+    Examples:
+        >>>    # To initialize a Bernoulli distribution which has equal probability of getting 1 and 0
+        >>>    b = nn.Bernoulli(0.5, dtype = mstype.int32)
+        >>>    # The following create two independent Bernoulli distributions
+        >>>    b = nn.Bernoulli([0.7, 0.2], dtype = mstype.int32)
+    """
+
+    def __init__(self,
+                 probs=None,
+                 seed=0,
+                 dtype=mstype.int32,
+                 name="Bernoulli"):
+        """
+        Constructor of Bernoulli distribution.
+        """
+        param = dict(locals())
+        super(Bernoulli, self).__init__(dtype, name, param)
+        if probs is not None:
+            self._probs = cast_to_tensor(probs)
+            check_prob(self._probs)
+        else:
+            self._probs = probs
+        self.seed = seed
+
+        # ops needed for the class
+        self.log = P.Log()
+        self.add = P.TensorAdd()
+        self.mul = P.Mul()
+        self.sqrt = P.Sqrt()
+        self.realdiv = P.RealDiv()
+        self.shape = P.Shape()
+        self.const = P.ScalarToArray()
+        self.less = P.Less()
+        self.cast = P.Cast()
+        self.erf = P.Erf()
+        self.sqrt = P.Sqrt()
+
+    def extend_repr(self):
+        str_info = f'probs = {self._probs}'
+        return str_info
+
+    def probs(self):
+        """
+        Returns the probability for the outcome is 1.
+        """
+        return self._probs
+
+    def _mean(self, name='mean', probs1=None):
+        r"""
+        .. math::
+            MEAN(B) = probs1
+        """
+        if name == 'mean':
+            return self._probs if probs1 is None else probs1
+        return None
+
+    def _var(self, name='var', probs1=None):
+        r"""
+        .. math::
+            VAR(B) = probs1 * probs0
+        """
+        if name in ('sd', 'var'):
+            probs1 = self._probs if probs1 is None else probs1
+            probs0 = self.add(1, -1 * probs1)
+            return self.mul(probs0, probs1)
+        return None
+
+    def _prob(self, name, value, probs=None):
+        r"""
+        pmf of Bernoulli distribution.
+
+        Args:
+            name (str): name of the function. Should be "prob" when passed in from construct.
+            value (Tensor): a Tensor composed of only zeros and ones.
+            probs (Tensor): probability of outcome is 1. Default: self._probs.
+
+        .. math::
+            pmf(k) = probs1 if k = 1;
+            pmf(k) = probs0 if k = 0;
+        """
+        if name in ('prob', 'log_prob'):
+            probs1 = self._probs if probs is None else probs
+            probs0 = self.add(1, -1 * probs1)
+            return self.add(self.mul(probs1, value),
+                            self.mul(probs0, self.add(1, -1 * value)))
+        return None
+
+    def _kl_loss(self, name, dist, probs1_b, probs1_a=None):
+        r"""
+        Evaluate bernoulli-bernoulli kl divergence, i.e. KL(a||b).
+
+        Args:
+            name (str): name of the funtion. Should always be "kl_loss" when passed in from construct.
+            dist (str): type of the distributions. Should be "Bernoulli" in this case.
+            probs1_b (Tensor): probs1 of distribution b.
+            probs1_a (Tensor): probs1 of distribution a. Default: self._probs.
+
+        .. math::
+            KL(a||b) = probs1_a * \log(\fract{probs1_a}{probs1_b}) +
+                       probs0_a * \log(\fract{probs0_a}{probs0_b})
+        """
+        if name == 'kl_loss' and dist == 'Bernoulli':
+            probs1_a = self._probs if probs1_a is None else probs1_a
+            probs0_a = self.add(1, -1 * probs1_a)
+            probs0_b = self.add(1, -1 * probs1_b)
+            return self.add(probs1_a * self.log(self.realdiv(probs1_a, probs1_b)),
+                            probs0_a * self.log(self.realdiv(probs0_a, probs0_b)))
+        return None
+
+    def _sample(self, name, shape=(), probs=None):
+        """
+        Sampling.
+
+        Args:
+            name (str): name of the function. Should always be 'sample' when passed in from construct.
+            shape (tuple): shape of the sample. Default: ().
+            probs (Tensor): probs1 of the samples. Default: self._probs.
+
+        Returns:
+            Tensor, shape is shape + batch_shape.
+        """
+        if name == 'sample':
+            probs1 = self._probs if probs is None else probs
+            batch_shape = self.shape(probs1)
+            sample_shape = shape + batch_shape
+            mean_zero = self.const(0.0)
+            sd_one = self.const(1.0)
+            sqrt_two = self.sqrt(self.const(2.0))
+            sample_norm = C.normal(sample_shape, mean_zero, sd_one, self.seed)
+            sample_uniform = 0.5 * (1 + self.erf(self.realdiv(sample_norm, sqrt_two)))
+            sample = self.less(sample_uniform, probs1)
+            sample = self.cast(sample, self._dtype)
+            return sample
+        return None
diff --git a/mindspore/nn/distribution/distribution.py b/mindspore/nn/distribution/distribution.py
new file mode 100644
index 0000000000..1ed7906a9e
--- /dev/null
+++ b/mindspore/nn/distribution/distribution.py
@@ -0,0 +1,200 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""basic"""
+from ..cell import Cell
+from ._utils.utils import calc_broadcast_shape_from_param
+
+
+class Distribution(Cell):
+    """
+    Base class for all mathematical distributions.
+
+    Args:
+        dtype (mindspore.dtype): type of the distribution.
+        name (str): name of the distribution.
+        param (dict): parameters used to initialize the distribution.
+
+    Note:
+        Derived class should override operations such as ,_mean, _prob,
+        and _log_prob. Functions should be called through construct when
+        used inside a network in the form  of function name followed by
+        arguments.
+
+    Examples:
+        >>> class MyNormalDistribution(Distribution):
+        >>>    def __init__(self):
+        >>>        super(MyDistribution, self).__init__()
+        >>>        self._mean_value = Tensor([2.0,3.0])
+        >>>        self._sd_value = Tensor([2.0,3.0])
+        >>>
+        >>>    def _mean(self):
+        >>>        return self._mean_value
+
+    """
+    def __init__(self,
+                 dtype,
+                 name,
+                 param):
+
+        """
+        Constructor of distribution class.
+        """
+        super(Distribution, self).__init__()
+        self._name = name
+        self._dtype = dtype
+        self._parameters = {}
+        # parsing parameters
+        for k in param.keys():
+            if not(k == 'self' or k.startswith('_')):
+                self._parameters[k] = param[k]
+        # some attributes
+        self._broadcast_shape = calc_broadcast_shape_from_param(
+            self._parameters)
+
+        # set the function to call according to the derived class's attributes
+        self._set_prob()
+        self._set_log_prob()
+        self._set_sd()
+
+    def _set_prob(self):
+        """
+        Set probability funtion based on the availability of _prob and _log_likehood.
+        """
+        if hasattr(self, '_prob'):
+            self._call_prob = self._prob
+        elif hasattr(self, '_log_likelihood'):
+            self._call_prob = self._calc_prob_from_log_likelihood
+
+    def _set_sd(self):
+        """
+        Set standard deviation based on the availability of _sd and _var.
+        """
+        if hasattr(self, '_sd'):
+            self._call_sd = self._sd
+        elif hasattr(self, '_var'):
+            self._call_sd = self._calc_sd_from_var
+
+    def _set_log_prob(self):
+        """
+        Set log probability based on the availability of _prob and _log_likelihood.
+        """
+        if hasattr(self, '_log_likelihood'):
+            self._call_log_prob = self._log_likelihood
+        if hasattr(self, '_prob'):
+            self._call_log_prob = self._calc_log_prob_from_prob
+
+    def log_likelihood(self, *args):
+        """
+        Evaluate the log probability at the given value.
+
+        Note:
+            value is casted to Tensor for further calculation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_log_prob(*args)
+
+    def _calc_prob_from_log_likelihood(self, *args):
+        r"""
+        Evaluate prob from log probability.
+
+        .. math::
+            probability(x) = \exp(log_likehood(x))
+        """
+        return self.exp(self._log_likelihood(*args))
+
+    def prob(self, *args):
+        """
+        Evaluate the prob (pdf or pmf) at given value.
+
+        Note:
+            value is casted to Tensor for further calculation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_prob(*args)
+
+    def _calc_log_prob_from_prob(self, *args):
+        r"""
+        Evaluate log probability from probability.
+
+        .. math::
+            log_prob(x) = \log(prob(x))
+        """
+        return self.log(self._prob(*args))
+
+    def kl_loss(self, **kwargs):
+        """
+        Evaluate the KL divergence. Parameters of the second distribution should be
+        passed in through **kwargs.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution and input distribution.
+        """
+        return self._kl_loss(**kwargs)
+
+    def mean(self, **kwargs):
+        """
+        Evaluate the mean.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._mean(**kwargs)
+
+    def sd(self, **kwargs):
+        """
+        Evaluate the standard deviation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_sd(**kwargs)
+
+    def _calc_sd_from_var(self, *args):
+        r"""
+        Evaluate log probability from probability.
+
+        .. math::
+            STD(x) = \sqrt(VAR(x))
+        """
+        return self.sqrt(self._var(*args))
+
+    def construct(self, *inputs):
+        """
+        Override construct in Cell.
+
+        Args:
+            *inputs: inputs[0] is always the name of the function.
+
+        Notes:
+            Always raise RuntimeError as Distribution should not be called directly.
+        """
+
+        if inputs[0] == 'log_prob':
+            return self._call_log_prob(*inputs)
+        if inputs[0] == 'prob':
+            return self._call_prob(*inputs)
+        if inputs[0] == 'kl_loss':
+            return self._kl_loss(*inputs)
+        if inputs[0] == 'mean':
+            return self._mean(*inputs)
+        if inputs[0] == 'sd':
+            return self._call_sd(*inputs)
+        if inputs[0] == 'sample':
+            return self._sample(*inputs)
+        return None
diff --git a/mindspore/nn/distribution/normal.py b/mindspore/nn/distribution/normal.py
new file mode 100644
index 0000000000..61cec6d810
--- /dev/null
+++ b/mindspore/nn/distribution/normal.py
@@ -0,0 +1,170 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Normal Distribution"""
+import numpy as np
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from .distribution import Distribution
+from ._utils.utils import convert_to_batch, check_greater_equal_zero
+from ...common import dtype as mstype
+from ...context import get_context
+
+class Normal(Distribution):
+    """
+    Example class: Normal distribution.
+
+    Args:
+        mean (int, float, list, numpy.ndarray, Tensor, Parameter): mean of the Gaussian distribution.
+        sd (int, float, list, numpy.ndarray, Tensor, Parameter): stddev of the Gaussian distribution.
+        seed (int): seed to use in sampling. Default: 0.
+        dtype (mindspore.dtype): type of the distribution. Default: mstype.float32.
+        name (str): name of the distribution. Default: Normal.
+
+
+    Note:
+        Standard deviation should be greater than zero.
+
+    Examples:
+        >>>    # To initialize a normal distribution of mean 3.0 and standard deviation 4.0
+        >>>    n = nn.Normal(3.0, 4.0, dtype=mstype.float32)
+        >>>    # The following create two independent normal distributions
+        >>>    n = nn.Normal([3.0, 3.0], [4.0, 4.0], dtype=mstype.float32)
+    """
+
+    def __init__(self,
+                 mean=None,
+                 sd=None,
+                 seed=0,
+                 dtype=mstype.float32,
+                 name="Normal"):
+        """
+        Constructor of normal distribution.
+        """
+        param = dict(locals())
+        super(Normal, self).__init__(dtype, name, param)
+        if  mean is not None and sd is not None:
+            self._mean_value = convert_to_batch(mean, self._broadcast_shape, dtype)
+            self._sd_value = convert_to_batch(sd, self._broadcast_shape, dtype)
+            check_greater_equal_zero(self._sd_value, "Standard deviation")
+        else:
+            self._mean_value = mean
+            self._sd_value = sd
+        self.seed = seed
+
+        #ops needed for the class
+        self.exp = P.Exp()
+        self.add = P.TensorAdd()
+        self.mul = P.Mul()
+        self.sq = P.Square()
+        self.log = P.Log()
+        self.sqrt = P.Sqrt()
+        self.realdiv = P.RealDiv()
+        self.expm1 = P.Expm1() if get_context('device_target') == 'Ascend' else self._expm1_by_step
+        self.shape = P.Shape()
+        self.zeroslike = P.ZerosLike()
+        self.const = P.ScalarToArray()
+
+    def extend_repr(self):
+        str_info = f'mean = {self._mean_value}, standard deviation = {self._sd_value}'
+        return str_info
+
+    def _expm1_by_step(self, x):
+        """
+        Expm1 ops under GPU context.
+        """
+        return self.add(self.exp(x), -1)
+
+    def _mean(self, name='mean', mean=None, sd=None):
+        """
+        Mean of the distribution.
+        """
+        if name == 'mean':
+            mean = self._mean_value if mean is None or sd is None else mean
+            return mean
+        return None
+
+    def _sd(self, name='sd', mean=None, sd=None):
+        """
+        Standard deviation of the distribution.
+        """
+        if name in ('sd', 'var'):
+            sd = self._sd_value if mean is None or sd is None else sd
+            return sd
+        return None
+
+    def _log_likelihood(self, name, value, mean=None, sd=None):
+        r"""
+        Evaluate log probability.
+
+        .. math::
+            L(x) = -1* \fract{(x - \mu)^2}{2. * \sigma^2} - \log(\sqrt(2* \pi * \sigma^2))
+        """
+        if name in ('prob', 'log_prob'):
+            mean = self._mean_value if mean is None else mean
+            sd = self._sd_value if sd is None else sd
+            unnormalized_log_prob = -1. * self.realdiv(self.sq(self.add(value, -1. * mean)),
+                                                       2. * self.sq(sd))
+            neg_normalization = -1. * self.log(self.sqrt(2. * np.pi * self.sq(sd)))
+            return self.add(unnormalized_log_prob, neg_normalization)
+        return None
+
+    def _kl_loss(self, name, dist, mean_b, sd_b, mean_a=None, sd_a=None):
+        r"""
+        Evaluate Normal-Normal kl divergence, i.e. KL(a||b).
+
+        Args:
+            name (str): name of the funtion passed in from construct. Should always be "kl_loss".
+            dist (str): type of the distributions. Should be "Normal" in this case.
+            mean_b (Tensor): mean of distribution b.
+            sd_b (Tensor): standard deviation distribution b.
+            mean_a (Tensor): mean of distribution a. Default: self._mean_value.
+            sd_a (Tensor): standard deviation distribution a. Default: self._sd_value.
+
+        .. math::
+            KL(a||b) = 0.5 * (\fract{MEAN(a)}{STD(b)} - \fract{MEAN(b)}{STD(b)}) ^ 2 +
+                       0.5 * EXPM1(2 * (\log(STD(a)) - \log(STD(b))) - (\log(STD(a)) - \log(STD(b)))
+        """
+        if name == 'kl_loss' and dist == 'Normal':
+            mean_a = self._mean_value if mean_a is None else mean_a
+            sd_a = self._sd_value if sd_a is None else sd_a
+            diff_log_scale = self.add(self.log(sd_a), - self.log(sd_b))
+            squared_diff = self.sq(self.add(self.realdiv(mean_a, sd_b), - self.realdiv(mean_b, sd_b)))
+            return self.add(self.add(0.5 * squared_diff, 0.5 * self.expm1(2 * diff_log_scale)), - diff_log_scale)
+        return None
+
+    def _sample(self, name, shape=(), mean=None, sd=None):
+        """
+        Sampling.
+
+        Args:
+            name (str): name of the function. Should always be 'sample' when passed in from construct.
+            shape (tuple): shape of the sample. Default: ().
+            mean (Tensor): mean of the samples. Default: self._mean_value.
+            sd (Tensor): standard deviation of the samples. Default: self._sd_value.
+
+        Returns:
+            Tensor, shape is shape + batch_shape.
+        """
+        if name == 'sample':
+            mean = self._mean_value if mean is None else mean
+            sd = self._sd_value if sd is None else sd
+            batch_shape = self.shape(self.add(self.zeroslike(mean), self.zeroslike(sd)))
+            sample_shape = shape + batch_shape
+            mean_zero = self.const(0.0)
+            sd_one = self.const(1.0)
+            sample_norm = C.normal(sample_shape, mean_zero, sd_one, self.seed)
+            sample = self.add(mean, self.mul(sample_norm, sd))
+            return sample
+        return None
diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py
index 14a1aa8554..384f625133 100644
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@@ -530,6 +530,7 @@ _activation = {
     'relu6': ReLU6,
     'tanh': Tanh,
     'gelu': GELU,
+    'elu': ELU,
     'sigmoid': Sigmoid,
     'prelu': PReLU,
     'leakyrelu': LeakyReLU,
diff --git a/mindspore/nn/layer/container.py b/mindspore/nn/layer/container.py
index 48871401bf..ed36a1dd5f 100644
--- a/mindspore/nn/layer/container.py
+++ b/mindspore/nn/layer/container.py
@@ -69,7 +69,7 @@ class SequentialCell(Cell):
     Alternatively, an ordered dict of cells can also be passed in.
 
     Args:
-        args (list, optional): List of subclass of Cell.
+        args (list, OrderedDict): List of subclass of Cell.
 
     Raises:
         TypeError: If arg is not of type list or OrderedDict.
diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py
index c8873039ab..3c4245d702 100755
--- a/mindspore/nn/layer/embedding.py
+++ b/mindspore/nn/layer/embedding.py
@@ -21,7 +21,7 @@ from mindspore.common.initializer import initializer
 from ..cell import Cell
 from ..._checkparam import Validator as validator
 
-__all__ = ['Embedding']
+__all__ = ['Embedding', 'EmbeddingLookup']
 
 class Embedding(Cell):
     r"""
@@ -105,3 +105,49 @@ class Embedding(Cell):
                 self.embedding_table,
                 self.dtype)
         return s
+
+class EmbeddingLookup(Cell):
+    r"""
+    Returns a slice of input tensor based on the specified indices.
+
+    Note:
+        When 'target' is set to 'CPU', this module will use
+        P.EmbeddingLookup().add_prim_attr('primitive_target', 'CPU') which
+        specified 'offset = 0' to lookup table.
+        when 'target' is set to 'DEVICE', this module will use P.GatherV2() which
+        specified 'axis = 0' to lookup table.
+
+    Args:
+        target (str): Specify the target where the op is executed. Default: 'CPU'.
+
+    Inputs:
+        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+          The Tensor slice, instead of the entire Tensor.
+        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
+          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
+          and the exceeding part will be filled with 0 in the output.
+
+    Outputs:
+        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
+
+    Examples:
+        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
+        >>> input_indices = Tensor(np.array([[1, 0], [3, 2]]), mindspore.int32)
+        >>> out = nn.EmbeddingLookup()(input_params, input_indices)
+        [[[10, 11], [8 ,9]], [[14, 15], [12, 13]]]
+    """
+    def __init__(self, target='CPU'):
+        super(EmbeddingLookup, self).__init__()
+        self.target = target
+        if target not in ('CPU', 'DEVICE'):
+            raise ValueError('Attr \'target\' of \'EmbeddingLookup\' Op passed '
+                             + str(target) + ', should be one of values in \'CPU\', \'DEVICE\'.')
+        self.gatherv2 = P.GatherV2()
+        self.embeddinglookup = P.EmbeddingLookup().add_prim_attr('primitive_target', 'CPU')
+
+    def construct(self, params, indices):
+        if self.target == "CPU":
+            out = self.embeddinglookup(params, indices, 0)
+        else:
+            out = self.gatherv2(params, indices, 0)
+        return out
diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index 3721bc3c44..63ae7a94ac 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -21,9 +21,13 @@ from mindspore.ops import functional as F
 from mindspore.ops.primitive import constexpr
 from mindspore._checkparam import Validator as validator
 from mindspore._checkparam import Rel
+from .conv import Conv2d
+from .container import CellList
+from .pooling import AvgPool2d
+from .activation import ReLU
 from ..cell import Cell
 
-__all__ = ['ImageGradients', 'SSIM', 'PSNR', 'CentralCrop']
+__all__ = ['ImageGradients', 'SSIM', 'MSSSIM', 'PSNR', 'CentralCrop']
 
 class ImageGradients(Cell):
     r"""
@@ -83,21 +87,6 @@ def _convert_img_dtype_to_float32(img, max_val):
         ret = ret * scale
     return ret
 
-
-@constexpr
-def _gauss_kernel_helper(filter_size):
-    """gauss kernel helper"""
-    filter_size = F.scalar_cast(filter_size, mstype.int32)
-    coords = ()
-    for i in range(filter_size):
-        i_cast = F.scalar_cast(i, mstype.float32)
-        offset = F.scalar_cast(filter_size-1, mstype.float32)/2.0
-        element = i_cast-offset
-        coords = coords+(element,)
-    g = np.square(coords).astype(np.float32)
-    g = Tensor(g)
-    return filter_size, g
-
 @constexpr
 def _check_input_4d(input_shape, param_name, func_name):
     if len(input_shape) != 4:
@@ -110,9 +99,65 @@ def _check_input_filter_size(input_shape, param_name, filter_size, func_name):
     validator.check(param_name + " shape[2]", input_shape[2], "filter_size", filter_size, Rel.GE, func_name)
     validator.check(param_name + " shape[3]", input_shape[3], "filter_size", filter_size, Rel.GE, func_name)
 
-@constexpr
-def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):
-    validator.check_type_name(param_name, input_dtype, allow_dtypes, cls_name)
+def _conv2d(in_channels, out_channels, kernel_size, weight, stride=1, padding=0):
+    return Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
+                  weight_init=weight, padding=padding, pad_mode="valid")
+
+def _create_window(size, sigma):
+    x_data, y_data = np.mgrid[-size // 2 + 1:size // 2 + 1, -size // 2 + 1:size // 2 + 1]
+    x_data = np.expand_dims(x_data, axis=-1).astype(np.float32)
+    x_data = np.expand_dims(x_data, axis=-1) ** 2
+    y_data = np.expand_dims(y_data, axis=-1).astype(np.float32)
+    y_data = np.expand_dims(y_data, axis=-1) ** 2
+    sigma = 2 * sigma ** 2
+    g = np.exp(-(x_data + y_data) / sigma)
+    return np.transpose(g / np.sum(g), (2, 3, 0, 1))
+
+def _split_img(x):
+    _, c, _, _ = F.shape(x)
+    img_split = P.Split(1, c)
+    output = img_split(x)
+    return output, c
+
+def _compute_per_channel_loss(c1, c2, img1, img2, conv):
+    """computes ssim index between img1 and img2 per single channel"""
+    dot_img = img1 * img2
+    mu1 = conv(img1)
+    mu2 = conv(img2)
+    mu1_sq = mu1 * mu1
+    mu2_sq = mu2 * mu2
+    mu1_mu2 = mu1 * mu2
+    sigma1_tmp = conv(img1 * img1)
+    sigma1_sq = sigma1_tmp - mu1_sq
+    sigma2_tmp = conv(img2 * img2)
+    sigma2_sq = sigma2_tmp - mu2_sq
+    sigma12_tmp = conv(dot_img)
+    sigma12 = sigma12_tmp - mu1_mu2
+    a = (2 * mu1_mu2 + c1)
+    b = (mu1_sq + mu2_sq + c1)
+    v1 = 2 * sigma12 + c2
+    v2 = sigma1_sq + sigma2_sq + c2
+    ssim = (a * v1) / (b * v2)
+    cs = v1 / v2
+    return ssim, cs
+
+def _compute_multi_channel_loss(c1, c2, img1, img2, conv, concat, mean):
+    """computes ssim index between img1 and img2 per color channel"""
+    split_img1, c = _split_img(img1)
+    split_img2, _ = _split_img(img2)
+    multi_ssim = ()
+    multi_cs = ()
+    for i in range(c):
+        ssim_per_channel, cs_per_channel = _compute_per_channel_loss(c1, c2, split_img1[i], split_img2[i], conv)
+        multi_ssim += (ssim_per_channel,)
+        multi_cs += (cs_per_channel,)
+
+    multi_ssim = concat(multi_ssim)
+    multi_cs = concat(multi_cs)
+
+    ssim = mean(multi_ssim, (2, 3))
+    cs = mean(multi_cs, (2, 3))
+    return ssim, cs
 
 class SSIM(Cell):
     r"""
@@ -157,67 +202,126 @@ class SSIM(Cell):
         self.max_val = max_val
         self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name)
         self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name)
-        validator.check_value_type('k1', k1, [float], self.cls_name)
-        self.k1 = validator.check_number_range('k1', k1, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name)
-        validator.check_value_type('k2', k2, [float], self.cls_name)
-        self.k2 = validator.check_number_range('k2', k2, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name)
-        self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size)
+        self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name)
+        self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name)
+        window = _create_window(filter_size, filter_sigma)
+        self.conv = _conv2d(1, 1, filter_size, Tensor(window))
+        self.conv.weight.requires_grad = False
+        self.reduce_mean = P.ReduceMean()
+        self.concat = P.Concat(axis=1)
 
     def construct(self, img1, img2):
-        _check_input_dtype(F.dtype(img1), "img1", [mstype.float32, mstype.float16], self.cls_name)
         _check_input_filter_size(F.shape(img1), "img1", self.filter_size, self.cls_name)
         P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
         img2 = _convert_img_dtype_to_float32(img2, self.max_val)
 
-        kernel = self._fspecial_gauss(self.filter_size, self.filter_sigma)
-        kernel = P.Tile()(kernel, (1, P.Shape()(img1)[1], 1, 1))
+        c1 = (self.k1 * max_val) ** 2
+        c2 = (self.k2 * max_val) ** 2
+
+        ssim_ave_channel, _ = _compute_multi_channel_loss(c1, c2, img1, img2, self.conv, self.concat, self.reduce_mean)
+        loss = self.reduce_mean(ssim_ave_channel, -1)
+
+        return loss
+
+def _downsample(img1, img2, op):
+    a = op(img1)
+    b = op(img2)
+    return a, b
+
+class MSSSIM(Cell):
+    r"""
+    Returns MS-SSIM index between img1 and img2.
+
+    Its implementation is based on Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. `Multiscale structural similarity
+    for image quality assessment <https://ieeexplore.ieee.org/document/1292216>`_.
+    Signals, Systems and Computers, 2004.
 
-        mean_ssim = self._calculate_mean_ssim(img1, img2, kernel, max_val, self.k1, self.k2)
+    .. math::
 
-        return mean_ssim
+        l(x,y)&=\frac{2\mu_x\mu_y+C_1}{\mu_x^2+\mu_y^2+C_1}, C_1=(K_1L)^2.\\
+        c(x,y)&=\frac{2\sigma_x\sigma_y+C_2}{\sigma_x^2+\sigma_y^2+C_2}, C_2=(K_2L)^2.\\
+        s(x,y)&=\frac{\sigma_{xy}+C_3}{\sigma_x\sigma_y+C_3}, C_3=C_2/2.\\
+        MSSSIM(x,y)&=l^alpha_M*{\prod_{1\leq j\leq M} (c^beta_j*s^gamma_j)}.
 
-    def _calculate_mean_ssim(self, x, y, kernel, max_val, k1, k2):
-        """calculate mean ssim"""
-        c1 = (k1 * max_val) * (k1 * max_val)
-        c2 = (k2 * max_val) * (k2 * max_val)
+    Args:
+        max_val (Union[int, float]): The dynamic range of the pixel values (255 for 8-bit grayscale images).
+          Default: 1.0.
+        power_factors (Union[tuple, list]): Iterable of weights for each of the scales.
+          Default: (0.0448, 0.2856, 0.3001, 0.2363, 0.1333). Default values obtained by Wang et al.
+        filter_size (int): The size of the Gaussian filter. Default: 11.
+        filter_sigma (float): The standard deviation of Gaussian kernel. Default: 1.5.
+        k1 (float): The constant used to generate c1 in the luminance comparison function. Default: 0.01.
+        k2 (float): The constant used to generate c2 in the contrast comparison function. Default: 0.03.
 
-        # SSIM luminance formula
-        # (2 * mean_{x} * mean_{y} + c1) / (mean_{x}**2 + mean_{y}**2 + c1)
-        mean_x = self.mean(x, kernel)
-        mean_y = self.mean(y, kernel)
-        square_sum = F.square(mean_x)+F.square(mean_y)
-        luminance = (2*mean_x*mean_y+c1)/(square_sum+c1)
+    Inputs:
+        - **img1** (Tensor) - The first image batch with format 'NCHW'. It should be the same shape and dtype as img2.
+        - **img2** (Tensor) - The second image batch with format 'NCHW'. It should be the same shape and dtype as img1.
 
-        # SSIM contrast*structure formula (when c3 = c2/2)
-        # (2 * conv_{xy} + c2) / (conv_{xx} + conv_{yy} + c2), equals to
-        # (2 * (mean_{xy} - mean_{x}*mean_{y}) + c2) / (mean_{xx}-mean_{x}**2 + mean_{yy}-mean_{y}**2 + c2)
-        mean_xy = self.mean(x*y, kernel)
-        mean_square_add = self.mean(F.square(x)+F.square(y), kernel)
+    Outputs:
+        Tensor, has the same dtype as img1. It is a 1-D tensor with shape N, where N is the batch num of img1.
 
-        cs = (2*(mean_xy-mean_x*mean_y)+c2)/(mean_square_add-square_sum+c2)
+    Examples:
+        >>> net = nn.MSSSIM(power_factors=(0.033, 0.033, 0.033))
+        >>> img1 = Tensor(np.random.random((1,3,128,128)))
+        >>> img2 = Tensor(np.random.random((1,3,128,128)))
+        >>> msssim = net(img1, img2)
+    """
+    def __init__(self, max_val=1.0, power_factors=(0.0448, 0.2856, 0.3001, 0.2363, 0.1333), filter_size=11,
+                 filter_sigma=1.5, k1=0.01, k2=0.03):
+        super(MSSSIM, self).__init__()
+        validator.check_value_type('max_val', max_val, [int, float], self.cls_name)
+        validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name)
+        self.max_val = max_val
+        validator.check_value_type('power_factors', power_factors, [tuple, list], self.cls_name)
+        self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name)
+        self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name)
+        self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name)
+        self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name)
+        window = _create_window(filter_size, filter_sigma)
+        self.level = len(power_factors)
+        self.conv = []
+        for i in range(self.level):
+            self.conv.append(_conv2d(1, 1, filter_size, Tensor(window)))
+            self.conv[i].weight.requires_grad = False
+        self.multi_convs_list = CellList(self.conv)
+        self.weight_tensor = Tensor(power_factors, mstype.float32)
+        self.avg_pool = AvgPool2d(kernel_size=2, stride=2, pad_mode='valid')
+        self.relu = ReLU()
+        self.reduce_mean = P.ReduceMean()
+        self.prod = P.ReduceProd()
+        self.pow = P.Pow()
+        self.pack = P.Pack(axis=-1)
+        self.concat = P.Concat(axis=1)
 
-        # SSIM formula
-        # luminance * cs
-        ssim = luminance*cs
+    def construct(self, img1, img2):
+        _check_input_4d(F.shape(img1), "img1", self.cls_name)
+        _check_input_4d(F.shape(img2), "img2", self.cls_name)
+        P.SameTypeShape()(img1, img2)
+        max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
+        img1 = _convert_img_dtype_to_float32(img1, self.max_val)
+        img2 = _convert_img_dtype_to_float32(img2, self.max_val)
 
-        mean_ssim = P.ReduceMean()(ssim, (-3, -2, -1))
+        c1 = (self.k1 * max_val) ** 2
+        c2 = (self.k2 * max_val) ** 2
 
-        return mean_ssim
+        sim = ()
+        mcs = ()
 
-    def _fspecial_gauss(self, filter_size, filter_sigma):
-        """get gauss kernel"""
-        filter_size, g = _gauss_kernel_helper(filter_size)
+        for i in range(self.level):
+            sim, cs = _compute_multi_channel_loss(c1, c2, img1, img2,
+                                                  self.multi_convs_list[i], self.concat, self.reduce_mean)
+            mcs += (self.relu(cs),)
+            img1, img2 = _downsample(img1, img2, self.avg_pool)
 
-        square_sigma_scale = -0.5/(filter_sigma * filter_sigma)
-        g = g*square_sigma_scale
-        g = F.reshape(g, (1, -1))+F.reshape(g, (-1, 1))
-        g = F.reshape(g, (1, -1))
-        g = P.Softmax()(g)
-        ret = F.reshape(g, (1, 1, filter_size, filter_size))
-        return ret
+        mcs = mcs[0:-1:1]
+        mcs_and_ssim = self.pack(mcs + (self.relu(sim),))
+        mcs_and_ssim = self.pow(mcs_and_ssim, self.weight_tensor)
+        ms_ssim = self.prod(mcs_and_ssim, -1)
+        loss = self.reduce_mean(ms_ssim, -1)
 
+        return loss
 
 class PSNR(Cell):
     r"""
diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index 1ecb20056e..ddcaf2da6b 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -55,7 +55,7 @@ class ReduceLogSumExp(Cell):
 
     Examples:
         >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
-        >>> op = P.ReduceLogSumExp(keep_dims=True)
+        >>> op = nn.ReduceLogSumExp(keep_dims=True)
         >>> output = op(input_x, 1)
     """
 
@@ -132,23 +132,19 @@ class Range(Cell):
 
 class LinSpace(Cell):
     r"""
-    Generates values in an interval. And return the corresponding interpolation accroding to assist.
+    Generates values in an interval.
 
     Args:
-        - **start** (Union[int, float]) - The start of interval, With shape of 0-D.
-        - **stop** (Union[int, float]) - The end of interval, With shape of 0-D.
-        - **num** (int) - ticks number in the interval, the ticks include start and stop value.
-          With shape of 0-D.
+        start (Union[int, float]): The start of interval. With shape of 0-D.
+        stop (Union[int, float]): The end of interval. With shape of 0-D.
+        num (int): ticks number in the interval, the ticks include start and stop value. With shape of 0-D.
 
     Outputs:
         Tensor, With type same as `start`. The shape is 1-D with length of `num`.
 
     Examples:
-        >>> linspace = nn.LinSpace()
-        >>> start = Tensor(1, mindspore.float32)
-        >>> stop = Tensor(10, mindspore.float32)
-        >>> num = Tensor(5, mindspore.int32)
-        >>> output = linspace(start, stop, num)
+        >>> linspace = nn.LinSpace(1, 10, 5)
+        >>> output = linspace()
         [1, 3.25, 5.5, 7.75, 10]
     """
 
diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index d6c920b620..05e5e54b96 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -84,13 +84,14 @@ class _BatchNorm(Cell):
         self.dtype = P.DType()
         self.reshape = P.Reshape()
         self.is_ascend = context.get_context("device_target") == "Ascend"
+        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
         self.momentum = 1.0 - momentum
         if context.get_context("enable_ge"):
             self.is_ge_backend = True
         else:
             self.is_ge_backend = False
 
-        if self.is_ge_backend or self.is_ascend:
+        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
             self.bn_train = P.BatchNorm(is_training=True,
                                         epsilon=self.eps)
         else:
@@ -152,7 +153,7 @@ class _BatchNorm(Cell):
             if self.is_ge_backend and self.is_global:
                 axes, re_shape = _shape_infer(F.shape(x), self.num_features)
                 y = self._global_sync(x, axes, re_shape)
-            elif self.is_ge_backend or self.is_ascend:
+            elif self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
                 if self.is_global:
                     axes, re_shape = _shape_infer(F.shape(x), self.num_features)
                     y = self._global_sync(x, axes, re_shape)
@@ -587,7 +588,7 @@ class GroupNorm(Cell):
         """calculate groupnorm output"""
         batch, channel, height, width = self.shape(x)
         _channel_check(channel, self.num_channels)
-        x = self.reshape(x, (batch, self.num_groups, channel*height*width/self.num_groups))
+        x = self.reshape(x, (batch, self.num_groups, -1))
         mean = self.reduce_mean(x, 2)
         var = self.reduce_sum(self.square(x - mean), 2) / (channel * height * width / self.num_groups - 1)
         std = self.sqrt(var + self.eps)
diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py
index f0c82937c5..63cdedbfe9 100644
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -17,6 +17,7 @@
 from functools import partial
 import numpy as np
 
+from mindspore import nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
@@ -41,8 +42,7 @@ __all__ = [
     'Conv2dBatchNormQuant',
     'Conv2dQuant',
     'DenseQuant',
-    'ReLUQuant',
-    'ReLU6Quant',
+    'ActQuant',
     'HSwishQuant',
     'HSigmoidQuant',
     'TensorAddQuant',
@@ -375,9 +375,10 @@ class FakeQuantWithMinMax(Cell):
 
     def extend_repr(self):
         s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
-            'quant_delay={}, min_init={}, max_init={}'.format(
-                self.num_bits, self.symmetric, self.narrow_range, self.ema, self.ema_decay, self.per_channel,
-                self.channel_axis, self.num_channels, self.quant_delay, self.min_init, self.max_init)
+            'quant_delay={}, min_init={}, max_init={}'.format(self.num_bits, self.symmetric, self.narrow_range,
+                                                              self.ema, self.ema_decay, self.per_channel,
+                                                              self.channel_axis, self.num_channels, self.quant_delay,
+                                                              self.min_init, self.max_init)
         return s
 
     def construct(self, x):
@@ -540,10 +541,12 @@ class Conv2dBatchNormQuant(Cell):
     def extend_repr(self):
         s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
             'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(
-                self.in_channels, self.out_channels, self.kernel_size, self.stride,
-                self.pad_mode, self.padding, self.dilation, self.group,
-                self.fake, self.freeze_bn, self.momentum, self.quant_delay)
+            'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(self.in_channels, self.out_channels,
+                                                                        self.kernel_size, self.stride,
+                                                                        self.pad_mode, self.padding, self.dilation,
+                                                                        self.group,
+                                                                        self.fake, self.freeze_bn, self.momentum,
+                                                                        self.quant_delay)
         return s
 
     def construct(self, x):
@@ -685,10 +688,9 @@ class Conv2dQuant(Cell):
     def extend_repr(self):
         s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
             'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'has_bias={}, quant_delay={}'.format(
-                self.in_channels, self.out_channels, self.kernel_size, self.stride,
-                self.pad_mode, self.padding, self.dilation, self.group,
-                self.has_bias, self.quant_delay)
+            'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
+                                                 self.pad_mode, self.padding, self.dilation, self.group,
+                                                 self.has_bias, self.quant_delay)
         return s
 
 
@@ -799,76 +801,23 @@ class DenseQuant(Cell):
 
 class _QuantActivation(Cell):
     r"""
-    Base class for Quant activation function. Add Fake Quant OP after activation OP.
+    Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
     """
 
     def get_origin(self):
         raise NotImplementedError
 
 
-class ReLUQuant(_QuantActivation):
+class ActQuant(_QuantActivation):
     r"""
-    ReLUQuant activation function. Add Fake Quant OP after Relu OP.
+    Quantization aware training activation function.
 
-    For a more Detailed overview of ReLU op.
-
-    Args:
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
-        num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
-        symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
-        narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
-        quant_delay (int): Quantization delay parameters according by global step. Default: 0.
-
-    Inputs:
-        - **x** (Tensor) - The input of ReLUQuant.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x`.
-
-    Examples:
-        >>> relu_quant = nn.ReLUQuant()
-        >>> input_x = Tensor(np.array([[1, 2, 0], [-1, -2, 1]]), mindspore.float32)
-        >>> result = relu_quant(input_x)
-    """
-
-    def __init__(self,
-                 ema_decay=0.999,
-                 per_channel=False,
-                 num_bits=8,
-                 symmetric=False,
-                 narrow_range=False,
-                 quant_delay=0):
-        super(ReLUQuant, self).__init__()
-        self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
-                                                  max_init=6,
-                                                  ema=True,
-                                                  ema_decay=ema_decay,
-                                                  per_channel=per_channel,
-                                                  num_bits=num_bits,
-                                                  symmetric=symmetric,
-                                                  narrow_range=narrow_range,
-                                                  quant_delay=quant_delay)
-        self.relu = P.ReLU()
-
-    def construct(self, x):
-        x = self.relu(x)
-        x = self.fake_quant_act(x)
-        return x
-
-    def get_origin(self):
-        return self.relu
-
-
-class ReLU6Quant(_QuantActivation):
-    r"""
-    ReLU6Quant activation function.
-
-    Add Fake Quant OP after Relu6. Not Recommand to used these cell for Fake Quant Op
+    Add Fake Quant OP after activation. Not Recommand to used these cell for Fake Quant Op
     Will climp the max range of the activation and the relu6 do the same operation.
     For a more Detailed overview of ReLU6 op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -883,19 +832,20 @@ class ReLU6Quant(_QuantActivation):
         Tensor, with the same type and shape as the `x`.
 
     Examples:
-        >>> relu6_quant = nn.ReLU6Quant(4, 1)
+        >>> act_quant = nn.ActQuant(4, 1)
         >>> input_x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
-        >>> result = relu6_quant(input_x)
+        >>> result = act_quant(input_x)
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
                  symmetric=False,
                  narrow_range=False,
                  quant_delay=0):
-        super(ReLU6Quant, self).__init__()
+        super(ActQuant, self).__init__()
         self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
                                                   max_init=6,
                                                   ema=True,
@@ -905,15 +855,15 @@ class ReLU6Quant(_QuantActivation):
                                                   symmetric=symmetric,
                                                   narrow_range=narrow_range,
                                                   quant_delay=quant_delay)
-        self.relu6 = P.ReLU6()
+        self.act = activation()
 
     def construct(self, x):
-        x = self.relu6(x)
+        x = self.act(x)
         x = self.fake_quant_act(x)
         return x
 
     def get_origin(self):
-        return self.relu6
+        return self.act
 
 
 class HSwishQuant(_QuantActivation):
@@ -923,6 +873,7 @@ class HSwishQuant(_QuantActivation):
     For a more Detailed overview of HSwish op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -943,6 +894,7 @@ class HSwishQuant(_QuantActivation):
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
@@ -968,7 +920,10 @@ class HSwishQuant(_QuantActivation):
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
-        self.act = P.HSwish()
+        if issubclass(activation, nn.HSwish):
+            self.act = activation()
+        else:
+            raise ValueError("Activation should be `nn.HSwish`")
 
     def construct(self, x):
         x = self.fake_quant_act_before(x)
@@ -987,6 +942,7 @@ class HSigmoidQuant(_QuantActivation):
     For a more Detailed overview of HSigmoid op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -1007,6 +963,7 @@ class HSigmoidQuant(_QuantActivation):
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
@@ -1032,7 +989,10 @@ class HSigmoidQuant(_QuantActivation):
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
-        self.act = P.HSigmoid()
+        if issubclass(activation, nn.HSwish):
+            self.act = activation()
+        else:
+            raise ValueError("Activation should be `nn.HSigmoid`")
 
     def construct(self, x):
         x = self.fake_quant_act_before(x)
@@ -1209,9 +1169,9 @@ class QuantBlock(Cell):
         return x
 
     def extend_repr(self):
-        str_info = f'quant={self.quant}, core_op={type(self.core_op)}'
+        str_info = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
         if self.has_bias:
-            str_info = str_info + f', bias={self.bias}'
+            str_info = str_info + f', bias=shape[{self.bias.shape}]'
         if self.has_act:
             str_info = str_info + f', activation={self.activation}'
         str_info = str_info + f', dequant={self.dequant}'
diff --git a/mindspore/nn/optim/__init__.py b/mindspore/nn/optim/__init__.py
index f1dac586bc..538c400067 100644
--- a/mindspore/nn/optim/__init__.py
+++ b/mindspore/nn/optim/__init__.py
@@ -20,14 +20,14 @@ The optimizer is used to calculate and update the gradients.
 """
 from .optimizer import Optimizer
 from .momentum import Momentum
-from .adam import Adam, AdamWeightDecay, AdamWeightDecayDynamicLR
+from .adam import Adam, PSAdam, AdamWeightDecay, AdamWeightDecayDynamicLR
 from .lamb import Lamb
 from .sgd import SGD
 from .lars import LARS
-from .ftrl import FTRL
+from .ftrl import FTRL, PSFTRL
 from .rmsprop import RMSProp
 from .proximal_ada_grad import ProximalAdagrad
 from .lazyadam import LazyAdam
 
-__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'AdamWeightDecay', 'LazyAdam',
-           'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'RMSProp', 'ProximalAdagrad']
+__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'PSAdam', 'AdamWeightDecay', 'LazyAdam',
+           'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'PSFTRL', 'RMSProp', 'ProximalAdagrad']
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index b73c284aab..eb6e64074f 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -27,6 +27,7 @@ from mindspore._checkparam import Rel
 from .optimizer import Optimizer
 
 _adam_opt = C.MultitypeFuncGraph("adam_opt")
+_adam_push_pull_opt = C.MultitypeFuncGraph("_adam_push_pull_opt")
 
 
 @_adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -129,6 +130,31 @@ def _run_opt_with_one_number(opt, sparse_opt, beta1_power, beta2_power, beta1, b
                                     eps, gradient))
     return success
 
+@_adam_push_pull_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                              "Tensor", "Tuple", "Tensor", "Tensor", "Tensor")
+def _run_push_pull_opt_with_sparse(push, pull, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params,
+                                   moment1, moment2):
+    """Apply sparse adam optimizer by push and pull to the weight parameter when the gradient is sparse."""
+    success = True
+    op_shape = P.Shape()
+    shapes = (op_shape(params), op_shape(moment1), op_shape(moment2),
+              op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
+              op_shape(beta2), op_shape(eps), op_shape(gradient[1]), op_shape(gradient[0]))
+    success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
+                                           eps, gradient[1], gradient[0]), shapes), params))
+    return success
+
+
+@_adam_push_pull_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                              "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
+def _run_push_pull_opt_with_one_number(push, pull, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params,
+                                       moment1, moment2):
+    """Apply adam optimizer by push and pull to the weight parameter using Tensor."""
+    success = True
+    op_shape = P.Shape()
+    success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
+                                          (op_shape(params), op_shape(moment1), op_shape(moment2))), params))
+    return success
 
 class Adam(Optimizer):
     r"""
@@ -162,8 +188,8 @@ class Adam(Optimizer):
 
         To improve parameter groups performance, the customized order of parameters can be supported.
 
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -274,6 +300,51 @@ class Adam(Optimizer):
                                 gradients, params, moment1, moment2)
         return success
 
+class PSAdam(Optimizer):
+    '''The same usage as Adam optimizer except the parameters are set PS mode.'''
+    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
+                 use_nesterov=False, weight_decay=0.0, loss_scale=1.0):
+        super(PSAdam, self).__init__(learning_rate, params, weight_decay, loss_scale)
+        _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
+        validator.check_value_type("use_locking", use_locking, [bool], self.cls_name)
+        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name)
+
+        self.beta1 = Tensor(beta1, mstype.float32)
+        self.beta2 = Tensor(beta2, mstype.float32)
+        self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power")
+        self.eps = Tensor(eps, mstype.float32)
+
+        self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
+        self.moment2 = self.parameters.clone(prefix="moment2", init='zeros')
+
+        self.hyper_map = C.HyperMap()
+        self.push = P.Push("Adam", [0, 1, 2])
+        self.push.add_prim_attr("primitive_target", "CPU")
+        self.pull = P.Pull()
+        self.pull.add_prim_attr("primitive_target", "CPU")
+
+    def construct(self, gradients):
+        params = self.parameters
+        moment1 = self.moment1
+        moment2 = self.moment2
+        gradients = self.decay_weight(gradients)
+        gradients = self.scale_grad(gradients)
+        lr = self.get_lr()
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+        if self.is_group_lr:
+            success = self.map_(F.partial(_adam_push_pull_opt, self.push, self.pull, beta1_power, beta2_power,
+                                          self.beta1, self.beta2, self.eps),
+                                lr, gradients, params, moment1, moment2)
+        else:
+            success = self.map_(F.partial(_adam_push_pull_opt, self.push, self.pull, beta1_power, beta2_power,
+                                          self.beta1, self.beta2, self.eps, lr),
+                                gradients, params, moment1, moment2)
+        return success
 
 class AdamWeightDecay(Optimizer):
     """
@@ -388,7 +459,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
                  beta2=0.999,
                  eps=1e-6,
                  weight_decay=0.0,
-                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
+                 decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()):
         super(AdamWeightDecayDynamicLR, self).__init__(0.0, params)
         if self.is_group:
             raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index b2954430b4..dd2ebddfa7 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -22,6 +22,7 @@ from mindspore._checkparam import Rel
 from .optimizer import Optimizer, _apply_decay, _grad_scale
 
 _ftrl_opt = C.MultitypeFuncGraph("ftrl_opt")
+_ftrl_push_pull_opt = C.MultitypeFuncGraph("ftrl_opt")
 
 
 @_ftrl_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tuple", "Tensor",
@@ -41,6 +42,26 @@ def _tensor_run_opt(opt, spars_opt, learning_rate, l1, l2, lr_power, linear, gra
     success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
     return success
 
+@_ftrl_push_pull_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tuple",
+                              "Tensor", "Tensor")
+def _tensor_run_push_pull_opt_with_sparse(push, pull, learning_rate, l1, l2, lr_power, linear, gradient,
+                                          weight, moment):
+    success = True
+    op_shape = P.Shape()
+    shapes = (op_shape(weight), op_shape(moment), op_shape(linear), op_shape(gradient[1]), op_shape(gradient[0]))
+    success = F.depend(success, pull(push((gradient[1], gradient[0]), shapes), weight))
+    return success
+
+
+@_ftrl_push_pull_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor",
+                              "Tensor", "Tensor")
+def _tensor_run_push_pull_opt_with_one_number(push, pull, learning_rate, l1, l2, lr_power, linear, gradient,
+                                              weight, moment):
+    success = True
+    op_shape = P.Shape()
+    success = F.depend(success, pull(push((gradient, learning_rate, l1, l2, lr_power),
+                                          (op_shape(weight), op_shape(moment), op_shape(linear))), weight))
+    return success
 
 def _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay=0.0, prim_name=None):
     """Check param."""
@@ -72,8 +93,8 @@ class FTRL(Optimizer):
     <https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf>`_ for engineering document.
 
     Note:
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -131,3 +152,37 @@ class FTRL(Optimizer):
         success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, lr, self.l1, self.l2, self.lr_power),
                             linear, grads, params, moments)
         return success
+
+class PSFTRL(Optimizer):
+    def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0,
+                 use_locking=False, loss_scale=1.0, weight_decay=0.0):
+        super(PSFTRL, self).__init__(learning_rate, params, loss_scale=loss_scale)
+        if self.is_group:
+            raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
+        _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay, self.cls_name)
+        self.moments = self.parameters.clone(prefix="moments", init=initial_accum)
+        self.linear = self.parameters.clone(prefix="linear", init='zeros')
+        self.l1 = l1
+        self.l2 = l2
+        self.lr_power = lr_power
+        self.weight_decay = weight_decay
+        self.decay_tf = tuple((lambda: True)() for x in self.parameters)
+
+        self.hyper_map = C.HyperMap()
+        self.push = P.Push("Ftrl", [0, 1, 2])
+        self.push.add_prim_attr("primitive_target", "CPU")
+        self.pull = P.Pull()
+        self.pull.add_prim_attr("primitive_target", "CPU")
+
+    def construct(self, grads):
+        params = self.parameters
+        moments = self.moments
+        linear = self.linear
+        lr = self.learning_rate
+        if self.weight_decay > 0.0:
+            grads = self.hyper_map(F.partial(_apply_decay, self.weight_decay), self.decay_tf, params, grads)
+
+        grads = self.scale_grad(grads)
+        success = self.map_(F.partial(_ftrl_push_pull_opt, self.push, self.pull, lr, self.l1, self.l2, self.lr_power),
+                            linear, grads, params, moments)
+        return success
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index 4b97d2eb20..7905398437 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -91,8 +91,8 @@ class LazyAdam(Optimizer):
         value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be
         applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
 
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse behavior, to be notice, is not equivalent to the
         original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
         continuous development. The sparse behavior is currently performed on the CPU.
 
diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py
index 75f3994e2a..25cf438034 100644
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -59,8 +59,8 @@ class ProximalAdagrad(Optimizer):
     <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
 
     Note:
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -71,7 +71,7 @@ class ProximalAdagrad(Optimizer):
         l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
         use_locking (bool): If True use locks for update operation. Default: False.
-        loss_scale (float): Value for the loss scale. It should be equal to or greater than 1.0. Default: 1.0.
+        loss_scale (float): Value for the loss scale. It should be greater than 0.0. Default: 1.0.
         wegith_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index 8e8885aff7..c4d3347038 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -171,7 +171,7 @@ class RMSProp(Optimizer):
             self.opt = P.ApplyRMSProp(use_locking)
 
         self.momentum = momentum
-        self.ms = self.parameters.clone(prefix="mean_square", init='zeros')
+        self.ms = self.parameters.clone(prefix="mean_square", init='ones')
         self.moment = self.parameters.clone(prefix="moment", init='zeros')
         self.hyper_map = C.HyperMap()
         self.epsilon = epsilon
diff --git a/mindspore/ops/__init__.py b/mindspore/ops/__init__.py
index b73d683284..7265b3c98b 100644
--- a/mindspore/ops/__init__.py
+++ b/mindspore/ops/__init__.py
@@ -32,7 +32,7 @@ Note:
 
 from .primitive import Primitive, PrimitiveWithInfer, prim_attr_register
 from .vm_impl_registry import get_vm_impl_fn, vm_impl_registry
-from .op_info_register import op_info_register, AkgRegOp, AiCPURegOp, TBERegOp, DataType
+from .op_info_register import op_info_register, AkgGpuRegOp, AkgAscendRegOp, AiCPURegOp, TBERegOp, DataType
 from .primitive import constexpr
 from .._c_expression import signature_rw, signature_kind
 
@@ -42,6 +42,6 @@ __primitive__ = [
 ]
 
 __all__ = ["get_vm_impl_fn", "vm_impl_registry",
-           "op_info_register", "AkgRegOp", "AiCPURegOp", "TBERegOp", "DataType",
+           "op_info_register", "AkgGpuRegOp", "AkgAscendRegOp", "AiCPURegOp", "TBERegOp", "DataType",
            "constexpr"]
 __all__.extend(__primitive__)
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index d1494bc051..b1a3e1d98b 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -191,13 +191,12 @@ def get_bprop_tile(self):
     return bprop
 
 
-@bprop_getters.register(inner.EmbeddingLookup)
+@bprop_getters.register(P.EmbeddingLookup)
 def get_bprop_embedding_lookup(self):
     """Generate bprop for EmbeddingLookup"""
     sub_op = P.Sub()
     reshape_op = P.Reshape()
-    host_reshape = P.Reshape().add_prim_attr('primitive_target', 'CPU')
-    def bprop_sparse(x, indices, offset, reduce_scatter_flag, split_num, out, dout):
+    def bprop_sparse(x, indices, offset, out, dout):
         x_shp = shape_op(x)
         new_indices = sub_op(indices, offset)
         # Reshape the 'new_indices'
@@ -205,17 +204,9 @@ def get_bprop_embedding_lookup(self):
         new_indices = reshape_op(new_indices, new_indices_shape_changed)
         x_shp_tail = x_shp[1:]
         actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail
-        if reduce_scatter_flag is True:
-            # On host
-            elu_grad = G.EmbeddingLookupCommGrad()
-            actual_dout = elu_grad(dout, split_num)
-            # Reshape the 'actual_dout' on host
-            actual_dout = host_reshape(actual_dout, actual_dout_shape_changed)
-        else:
-            # Reshape the 'actual_dout' on device
-            actual_dout = reshape_op(dout, actual_dout_shape_changed)
-        return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset), \
-               zeros_like(reduce_scatter_flag), zeros_like(split_num)
+        # Reshape the 'actual_dout' on device
+        actual_dout = reshape_op(dout, actual_dout_shape_changed)
+        return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset)
     return bprop_sparse
 
 
@@ -248,19 +239,37 @@ def get_bprop_transpose(self):
     return bprop
 
 
+@constexpr
+def _concat_grad_uniform(input_shapes, input_nums):
+    """Helper function for bprop of Concat"""
+    is_uniform = True
+    for i in range(1, input_nums):
+        if input_shapes[i-1] != input_shapes[i]:
+            is_uniform = False
+            break
+    return is_uniform
+
 @bprop_getters.register(P.Concat)
 def get_bprop_concat(self):
     """Generate bprop for Concat"""
     axis = self.axis
+    is_ascend = context.get_context('device_target') == "Ascend"
 
     def bprop(x, out, dout):
         dx = ()
         out_offset = G.ConcatOffset(F.tuple_len(x), axis)(x)
-        for i in range(F.tuple_len(x)):
-            slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i]))
-            dx = dx + (slice_out,)
+        input_nums = F.tuple_len(x)
+        input_shapes = ()
+        for i in range(input_nums):
+            input_shapes = input_shapes + (shape_op(x[i]),)
+        is_uniform = _concat_grad_uniform(input_shapes, input_nums)
+        if is_uniform and is_ascend:
+            dx = P.Split(axis, input_nums)(dout)
+        else:
+            for i in range(input_nums):
+                slice_out = P.Slice()(dout, out_offset[i], input_shapes[i])
+                dx = dx + (slice_out,)
         return (dx,)
-
     return bprop
 
 
@@ -644,6 +653,36 @@ def get_bprop_unsorted_segment_min(self):
     return bprop
 
 
+@bprop_getters.register(P.UnsortedSegmentProd)
+def get_bprop_unsorted_segment_prod(self):
+    """Generate bprop for UnsortedSegmentProd"""
+    equal = P.Equal()
+    cast = P.Cast()
+    select = P.Select()
+    gather = P.GatherV2()
+    greater = P.Greater()
+    ones_like = P.OnesLike()
+    maximum = P.Maximum()
+    unsorted_segment_prod = P.UnsortedSegmentProd()
+
+    def bprop(x, segment_ids, num_segments, out, dout):
+        is_zero = equal(x, 0)
+        num_zero = unsorted_segment_sum(cast(is_zero, mstype.int32), segment_ids, num_segments)
+        grad = select(greater(num_zero, 1), zeros_like(dout), dout)
+        non_zero_data = select(is_zero, ones_like(x), x)
+        non_zero_prod = unsorted_segment_prod(non_zero_data, segment_ids, num_segments)
+        zero_clipped_indices = maximum(segment_ids, zeros_like(segment_ids))
+        gathered_prod = gather(out, zero_clipped_indices, 0)
+        gathered_non_zero_prod = gather(non_zero_prod, zero_clipped_indices, 0)
+        prod_divided_by_x = gathered_prod / x
+        partial_derivative = select(is_zero, gathered_non_zero_prod, prod_divided_by_x)
+        gathered_grad, _, _ = _GatherDropNegatives(grad, segment_ids, zero_clipped_indices)
+        dx = gathered_grad * partial_derivative
+        return dx, zeros_like(segment_ids), zeros_like(num_segments)
+
+    return bprop
+
+
 @bprop_getters.register(P.SpaceToBatch)
 def get_bprop_space_to_batch(self):
     """Generate bprop for SpaceToBatch"""
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index 3a86a05943..61c7e40960 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -760,6 +760,19 @@ def get_bprop_ctc_loss(self):
     return bprop
 
 
+@bprop_getters.register(P.CTCLossV2)
+def get_bprop_ctc_loss_v2(self):
+    """Grad definition for `CTCLossV2` operation"""
+    expand = P.ExpandDims()
+
+    def bprop(inputs, labels, input_lengths, labels_lengths, out, dout):
+        grad_loss = out[1]
+        grad = grad_loss * expand(dout[0], -1)
+        return grad, zeros_like(labels), zeros_like(input_lengths), zeros_like(labels_lengths)
+
+    return bprop
+
+
 @bprop_getters.register(P.BasicLSTMCell)
 def get_bprop_basic_lstm_cell(self):
     """Grad definition for `BasicLSTMCell` operation."""
diff --git a/mindspore/ops/_op_impl/__init__.py b/mindspore/ops/_op_impl/__init__.py
index 65a12cd73c..59729f833f 100644
--- a/mindspore/ops/_op_impl/__init__.py
+++ b/mindspore/ops/_op_impl/__init__.py
@@ -17,7 +17,7 @@
 import platform
 from .aicpu import *
 if "Windows" not in platform.system():
-    from .akg.gpu import *
+    from .akg import *
     from .tbe import *
 
 __all__ = []
diff --git a/mindspore/ops/_op_impl/akg/__init__.py b/mindspore/ops/_op_impl/akg/__init__.py
index fd86dbf999..c4c70b7aa1 100644
--- a/mindspore/ops/_op_impl/akg/__init__.py
+++ b/mindspore/ops/_op_impl/akg/__init__.py
@@ -13,77 +13,6 @@
 # limitations under the License.
 # ============================================================================
 
-"""autodiff ops"""
-from .abs import _abs_akg
-from .add_n import _add_n_akg
-from .add import _add_akg
-from .apply_momentum import _apply_momentum_akg
-from .assign import _assign_akg
-from .inplace_assign import _inplace_assign_akg
-from .assign_add import _assign_add_akg
-from .bias_add_grad import _bias_add_grad_akg
-from .bias_add import _bias_add_akg
-from .cast import _cast_akg
-from .clear_zero import _clear_zero_akg
-from .conv_bn1 import _conv_bn1_akg
-from .conv2d_backprop_filter import _conv2d_backprop_filter_akg
-from .conv2d_backprop_input import _conv2d_backprop_input_akg
-from .conv2d import _conv2d_akg
-from .div import _div_akg
-from .equal_count import _equal_count_akg
-from .exp import _exp_akg
-from .five2four import _five2four_akg
-from .four2five import _four2five_akg
-from .fused_batch_norm_grad import _fused_batch_norm_grad_akg
-from .fused_batch_norm_infer import _fused_batch_norm_infer_akg
-from .fused_batch_norm import _fused_batch_norm_akg
-from .fused_bn1_grad import _bn1_grad_akg
-from .fused_bn1 import _fused_bn1_akg
-from .fused_bn2_grad import _bn2_grad_akg
-from .fused_bn2 import _fused_bn2_akg
-from .fused_bn3_grad import _bn3_grad_akg
-from .fused_bn3 import _fused_bn3_akg
-from .gather_v2 import _gather_v2_akg
-from .less import _less_akg
-from .log import _log_akg
-from .matmul import _matmul_akg
-from .batchmatmul import _batchmatmul_akg
-from .max_pool_grad_with_argmax import _max_pool_grad_with_argmax_akg
-from .max_pool_with_argmax import _max_pool_with_argmax_akg
-from .max import _max_akg
-from .maximum import _maximum_akg
-from .mean_grad import _mean_grad_akg
-from .mean import _mean_akg
-from .minimum import _minimum_akg
-from .mul import _mul_akg
-from .neg import _neg_akg
-from .one_hot import _one_hot_akg
-from .pow import _power_akg
-from .real_div import _real_div_akg
-from .reciprocal import _reciprocal_akg
-from .reduce_max import _reduce_max_akg
-from .reduce_mean import _reduce_mean_akg
-from .reduce_sum import _reduce_sum_akg
-from .relu_grad import _relu_grad_akg
-from .relu import _relu_akg
-from .reshape import _reshape_akg
-from .round import _round_akg
-from .rsqrt import _rsqrt_akg
-from .select import _select_akg
-from .softmax import _softmax_akg
-from .sparse_softmax_cross_entropy_with_logits import _sparse_softmax_cross_entropy_with_logits_akg
-from .sqrt import _sqrt_akg
-from .strided_slice import _strided_slice_akg
-from .sub import _sub_akg
-from .sum import _sum_akg
-from .tile import _tile_akg
-from .zeros_like import _zeros_like_akg
-from .argmax import _argmax_akg
-from .floordiv import _floor_div_akg
-from .equal import _equal_akg
-from .greater_equal import _greater_equal_akg
-from .less_equal import _less_equal_akg
-from .expand_dims import _expand_dims_akg
-from .greater import _greater_akg
-from .equiv_format import _equiv_format_akg
+"""akg ops"""
+from . import ascend
 from . import gpu
diff --git a/mindspore/ops/_op_impl/akg/abs.py b/mindspore/ops/_op_impl/akg/abs.py
deleted file mode 100644
index 8c08f405da..0000000000
--- a/mindspore/ops/_op_impl/akg/abs.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Abs op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Abs",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _abs_akg():
-    """Abs AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/add.py b/mindspore/ops/_op_impl/akg/add.py
deleted file mode 100644
index 60544ea1c7..0000000000
--- a/mindspore/ops/_op_impl/akg/add.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""TensorAdd op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "TensorAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _add_akg():
-    """TensorAdd AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/add_n.py b/mindspore/ops/_op_impl/akg/add_n.py
deleted file mode 100644
index 53320f752e..0000000000
--- a/mindspore/ops/_op_impl/akg/add_n.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""AddN op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "AddN",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32", "float16", "float32",
-                "float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "dynamic",
-            "name": "inputs"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32", "float16", "float32",
-                "float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _add_n_akg():
-    """AddN AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/apply_momentum.py b/mindspore/ops/_op_impl/akg/apply_momentum.py
deleted file mode 100644
index 7160571882..0000000000
--- a/mindspore/ops/_op_impl/akg/apply_momentum.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ApplyMomentum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ApplyMomentum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "use_nesterov",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "gradient_scale",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "variable"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "accumulation"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","DefaultFormat"
-            ],
-            "name": "learning_rate"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "gradient"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","DefaultFormat"
-            ],
-            "name": "momentum"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _apply_momentum_akg():
-    """ApplyMomentum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/argmax.py b/mindspore/ops/_op_impl/akg/argmax.py
deleted file mode 100644
index b04862cbeb..0000000000
--- a/mindspore/ops/_op_impl/akg/argmax.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Argmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Argmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _argmax_akg():
-    """Argmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/__init__.py b/mindspore/ops/_op_impl/akg/ascend/__init__.py
new file mode 100644
index 0000000000..a4d7aec7d0
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/__init__.py
@@ -0,0 +1,30 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+
+from .add import _add_akg
+from .batchmatmul import _batchmatmul_akg
+from .cast import _cast_akg
+from .expand_dims import _expand_dims_akg
+from .greater import _greater_akg
+from .inplace_assign import _inplace_assign_akg
+from .maximum import _maximum_akg
+from .minimum import _minimum_akg
+from .mul import _mul_akg
+from .real_div import _real_div_akg
+from .rsqrt import _rsqrt_akg
+from .select import _select_akg
+from .sqrt import _sqrt_akg
+from .sub import _sub_akg
diff --git a/mindspore/ops/_op_impl/akg/ascend/add.py b/mindspore/ops/_op_impl/akg/ascend/add.py
new file mode 100644
index 0000000000..d8689eed6d
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/add.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""TensorAdd op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("TensorAdd") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _add_akg():
+    """TensorAdd Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py b/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py
new file mode 100644
index 0000000000..d7815c15e6
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py
@@ -0,0 +1,33 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""BatchMatMul op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("BatchMatMul") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x1") \
+    .input(1, "x2") \
+    .output(0, "output") \
+    .attr("transpose_a", "optional", "bool") \
+    .attr("transpose_b", "optional", "bool") \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _batchmatmul_akg():
+    """BatchMatMul AKG register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/cast.py b/mindspore/ops/_op_impl/akg/ascend/cast.py
new file mode 100644
index 0000000000..1b874352f8
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/cast.py
@@ -0,0 +1,46 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cast op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Cast") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .attr("dst_type", "required", "str") \
+    .dtype_format(DT.F16_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_Default, DT.I32_Default) \
+    .dtype_format(DT.F32_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.I32_Default) \
+    .dtype_format(DT.I32_Default, DT.F16_Default) \
+    .dtype_format(DT.I32_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F16_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F16_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.I32_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.BOOL_FracNZ, DT.I32_FracNZ) \
+    .dtype_format(DT.BOOL_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _cast_akg():
+    """Cast Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/expand_dims.py b/mindspore/ops/_op_impl/akg/ascend/expand_dims.py
new file mode 100644
index 0000000000..24faf241aa
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/expand_dims.py
@@ -0,0 +1,33 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ExpandDims op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("ExpandDims") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x") \
+    .output(0, "y") \
+    .attr("axis", "required", "int") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _expand_dims_akg():
+    """ExpandDims Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/greater.py b/mindspore/ops/_op_impl/akg/ascend/greater.py
new file mode 100644
index 0000000000..14164c895b
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/greater.py
@@ -0,0 +1,34 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Greater op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Greater") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.BOOL_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.BOOL_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.BOOL_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.BOOL_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _greater_akg():
+    """Greater Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py b/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py
new file mode 100644
index 0000000000..9f76706440
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InplaceAssign op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("InplaceAssign") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .input(2, "z") \
+    .output(0, "output") \
+    .attr("fake_output", "optional", "bool") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _inplace_assign_akg():
+    """InplaceAssign Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/maximum.py b/mindspore/ops/_op_impl/akg/ascend/maximum.py
new file mode 100644
index 0000000000..b57de7d15a
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/maximum.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Maximum op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Maximum") \
+    .fusion_type("COMMREDUCE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _maximum_akg():
+    """Maximum Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/minimum.py b/mindspore/ops/_op_impl/akg/ascend/minimum.py
new file mode 100644
index 0000000000..cdc0abfc6d
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/minimum.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Minimum op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Minimum") \
+    .fusion_type("COMMREDUCE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _minimum_akg():
+    """Minimum Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/mul.py b/mindspore/ops/_op_impl/akg/ascend/mul.py
new file mode 100644
index 0000000000..ea21888b84
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/mul.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Mul op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Mul") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .attr("x_shape", "required", "listInt") \
+    .attr("y_shape", "required", "listInt") \
+    .attr("data_format", "required", "listStr") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _mul_akg():
+    """Mul Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/real_div.py b/mindspore/ops/_op_impl/akg/ascend/real_div.py
new file mode 100644
index 0000000000..c7c3ad9eb6
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/real_div.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""RealDiv op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("RealDiv") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _real_div_akg():
+    """RealDiv Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/rsqrt.py b/mindspore/ops/_op_impl/akg/ascend/rsqrt.py
new file mode 100644
index 0000000000..55cf876951
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/rsqrt.py
@@ -0,0 +1,35 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Rsqrt op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Rsqrt") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _rsqrt_akg():
+    """Rsqrt Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/select.py b/mindspore/ops/_op_impl/akg/ascend/select.py
new file mode 100644
index 0000000000..67fee114ca
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/select.py
@@ -0,0 +1,37 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Select op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Select") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "condition") \
+    .input(1, "x") \
+    .input(2, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.BOOL_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.BOOL_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _select_akg():
+    """Select Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/sqrt.py b/mindspore/ops/_op_impl/akg/ascend/sqrt.py
new file mode 100644
index 0000000000..43f64b8973
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/sqrt.py
@@ -0,0 +1,35 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Sqrt op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Sqrt") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _sqrt_akg():
+    """Sqrt Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/sub.py b/mindspore/ops/_op_impl/akg/ascend/sub.py
new file mode 100644
index 0000000000..62001b3f44
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/sub.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Sub op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Sub") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _sub_akg():
+    """Sub Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/assign.py b/mindspore/ops/_op_impl/akg/assign.py
deleted file mode 100644
index e7c5a082bd..0000000000
--- a/mindspore/ops/_op_impl/akg/assign.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Assign op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Assign",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "ref"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _assign_akg():
-    """Assign AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/assign_add.py b/mindspore/ops/_op_impl/akg/assign_add.py
deleted file mode 100644
index 7d0d345764..0000000000
--- a/mindspore/ops/_op_impl/akg/assign_add.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""AssignAdd op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "AssignAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "ref"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _assign_add_akg():
-    """AssignAdd AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/batchmatmul.py b/mindspore/ops/_op_impl/akg/batchmatmul.py
deleted file mode 100644
index f5da71aa25..0000000000
--- a/mindspore/ops/_op_impl/akg/batchmatmul.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BatchMatMul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BatchMatMul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "transpose_a",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "transpose_b",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "x1"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "x2"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _batchmatmul_akg():
-    """BatchMatMul AKG register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/bias_add.py b/mindspore/ops/_op_impl/akg/bias_add.py
deleted file mode 100644
index 74f2bf7bcf..0000000000
--- a/mindspore/ops/_op_impl/akg/bias_add.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BiasAdd op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-@op_info_register("""{
-    "op_name": "BiasAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "b"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bias_add_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/bias_add_grad.py b/mindspore/ops/_op_impl/akg/bias_add_grad.py
deleted file mode 100644
index 7726af6692..0000000000
--- a/mindspore/ops/_op_impl/akg/bias_add_grad.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BiasAddGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BiasAddGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "dout"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bias_add_grad_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/cast.py b/mindspore/ops/_op_impl/akg/cast.py
deleted file mode 100644
index a78d4d87e4..0000000000
--- a/mindspore/ops/_op_impl/akg/cast.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Cast op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Cast",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "dst_type",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "bool", "bool", 
-                "float16", "float32", "int32", "int32", 
-                "bool", 
-                "float16", "float32", "bool", "bool",
-                "float16", "float32", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", 
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float16", "int32", "float16", 
-                "int32", "int32", "float16", "float32", 
-                "float32", 
-                "float32", "float16", "int32", "float32",
-                "float32", "float16", "int32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", 
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _cast_akg():
-    """Cast AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/clear_zero.py b/mindspore/ops/_op_impl/akg/clear_zero.py
deleted file mode 100644
index 38bf35044f..0000000000
--- a/mindspore/ops/_op_impl/akg/clear_zero.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ClearZero op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ClearZero",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "pad_mod",
-            "param_type": "optional",
-            "type": "string"
-        },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-    ]
-}""")
-def _clear_zero_akg():
-    """MaxPoolGradWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d.py b/mindspore/ops/_op_impl/akg/conv2d.py
deleted file mode 100644
index 709aca7001..0000000000
--- a/mindspore/ops/_op_impl/akg/conv2d.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2D op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2D",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "w_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "w"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_akg():
-    """Conv2D AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py
deleted file mode 100644
index 1e4e4f1a1e..0000000000
--- a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2DBackpropFilter op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2DBackpropFilter",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "input_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "filter_sizes",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "out_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "input"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_backprop_filter_akg():
-    """Conv2DBackpropFilter AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py
deleted file mode 100644
index 52c7f2e7b3..0000000000
--- a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2DBackpropInput op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2DBackpropInput",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "input_sizes",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "filter_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "out_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "filter"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_backprop_input_akg():
-    """Conv2DBackpropInput AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv_bn1.py b/mindspore/ops/_op_impl/akg/conv_bn1.py
deleted file mode 100644
index 118c94e6fc..0000000000
--- a/mindspore/ops/_op_impl/akg/conv_bn1.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ConvBN1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ConvBN1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "w_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "w"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "conv_res_16"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "var_part"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        }
-    ]
-}""")
-def _conv_bn1_akg():
-    """ConvBN1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/div.py b/mindspore/ops/_op_impl/akg/div.py
deleted file mode 100644
index 56cdcca868..0000000000
--- a/mindspore/ops/_op_impl/akg/div.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Div op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Div",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _div_akg():
-    """Div AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equal.py b/mindspore/ops/_op_impl/akg/equal.py
deleted file mode 100644
index 35874c62bb..0000000000
--- a/mindspore/ops/_op_impl/akg/equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Equal op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Equal",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equal_count.py b/mindspore/ops/_op_impl/akg/equal_count.py
deleted file mode 100644
index 9c575db7b3..0000000000
--- a/mindspore/ops/_op_impl/akg/equal_count.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""EqualCount op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "EqualCount",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equal_count_akg():
-    """EqualCount AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equiv_format.py b/mindspore/ops/_op_impl/akg/equiv_format.py
deleted file mode 100644
index 111451b15c..0000000000
--- a/mindspore/ops/_op_impl/akg/equiv_format.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""EquivFormat op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "EquivFormat",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equiv_format_akg():
-    """EquivFormat AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/exp.py b/mindspore/ops/_op_impl/akg/exp.py
deleted file mode 100644
index 273b3348a4..0000000000
--- a/mindspore/ops/_op_impl/akg/exp.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Exp op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Exp",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _exp_akg():
-    """Exp AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/expand_dims.py b/mindspore/ops/_op_impl/akg/expand_dims.py
deleted file mode 100644
index 9e1b18153a..0000000000
--- a/mindspore/ops/_op_impl/akg/expand_dims.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ExpandDims op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ExpandDims",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "y"
-        }
-    ]
-}""")
-def _expand_dims_akg():
-    """ExpandDims AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/five2four.py b/mindspore/ops/_op_impl/akg/five2four.py
deleted file mode 100644
index 1dac2c3628..0000000000
--- a/mindspore/ops/_op_impl/akg/five2four.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Five2Four op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Five2Four",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "shape4d",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dstType",
-            "param_type": "required",
-            "type": "str"
-        },
-        {
-            "name": "output_format",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float16","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float16","float32","float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NHWC","DefaultFormat","DefaultFormat","NHWC","NHWC"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _five2four_akg():
-    """Five2Four AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/floordiv.py b/mindspore/ops/_op_impl/akg/floordiv.py
deleted file mode 100644
index 99e577b4be..0000000000
--- a/mindspore/ops/_op_impl/akg/floordiv.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FloorDiv op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FloorDiv",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _floor_div_akg():
-    """FloorDiv AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/four2five.py b/mindspore/ops/_op_impl/akg/four2five.py
deleted file mode 100644
index 01b6f85715..0000000000
--- a/mindspore/ops/_op_impl/akg/four2five.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Four2Five op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Four2Five",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        },
-        {
-            "name": "dst_type",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float32", "float16","float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16", "float32", "float16", "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _four2five_akg():
-    """Four2Five AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm.py b/mindspore/ops/_op_impl/akg/fused_batch_norm.py
deleted file mode 100644
index 5ce9839328..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNorm op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNorm",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "epsilon",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "b"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "y"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_mean"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_variance"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_inv_variance"
-        }
-    ]
-}""")
-def _fused_batch_norm_akg():
-    """FusedBatchNorm AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py
deleted file mode 100644
index 9191548f73..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNormGrad op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNormGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_inv_variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dx"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "bn_scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "bn_bias"
-        }
-    ]
-}""")
-def _fused_batch_norm_grad_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py
deleted file mode 100644
index 1e7743fa8f..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNormInfer op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNormInfer",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "epsilon",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "b"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ]
-}""")
-def _fused_batch_norm_infer_akg():
-    """FusedBatchNormInfer AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn1.py b/mindspore/ops/_op_impl/akg/fused_bn1.py
deleted file mode 100644
index fdaa673f25..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn1.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn1_akg():
-    """FusedBN1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py
deleted file mode 100644
index 8de6796d6f..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "mean"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn1_grad_akg():
-    """BNGrad1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn2.py b/mindspore/ops/_op_impl/akg/fused_bn2.py
deleted file mode 100644
index e26a5ad8a0..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn2.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN2 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "var_part"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_mean"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_var"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn2_akg():
-    """FusedBN2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py
deleted file mode 100644
index e29a9177b6..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "eps",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_shape",
-            "param_type": "optional",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dgamma_red_hw"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dbeta_red_hw"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "gamma"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn2_grad_akg():
-    """BNGrad2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn3.py b/mindspore/ops/_op_impl/akg/fused_bn3.py
deleted file mode 100644
index 74f3f652f3..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn3.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN3 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN3",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "eps",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "data"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        },{
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "gamma"
-        },{
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "beta"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn3_akg():
-    """FusedBN3 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py
deleted file mode 100644
index 5ffc57a68e..0000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad3 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad3",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "rs"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dgamma_dx"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dbeta_dx"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data_minus_mean"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn3_grad_akg():
-    """BNGrad3 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/gather_v2.py b/mindspore/ops/_op_impl/akg/gather_v2.py
deleted file mode 100644
index 84ab7eb669..0000000000
--- a/mindspore/ops/_op_impl/akg/gather_v2.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""GatherV2 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "GatherV2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "params"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "indices"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _gather_v2_akg():
-    """GatherV2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/gpu/cast.py b/mindspore/ops/_op_impl/akg/gpu/cast.py
index 2f31dab1ba..c8aef249cd 100644
--- a/mindspore/ops/_op_impl/akg/gpu/cast.py
+++ b/mindspore/ops/_op_impl/akg/gpu/cast.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 """Cast op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-cast_op_info = AkgRegOp("Cast") \
+cast_op_info = AkgGpuRegOp("Cast") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
     .attr("dst_type", "required", "str") \
     .dtype_format(DataType.F16_Default, DataType.F32_Default) \
     .dtype_format(DataType.F32_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default) \
     .dtype_format(DataType.I32_Default, DataType.F32_Default) \
     .dtype_format(DataType.BOOL_Default, DataType.F32_Default) \
     .get_op_info()
diff --git a/mindspore/ops/_op_impl/akg/gpu/equal.py b/mindspore/ops/_op_impl/akg/gpu/equal.py
index fa20392411..40a3590f61 100644
--- a/mindspore/ops/_op_impl/akg/gpu/equal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/equal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Equal op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-equal_op_info = AkgRegOp("Equal") \
+equal_op_info = AkgGpuRegOp("Equal") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/greater_equal.py b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
index b000cbd0e3..666c939b4b 100644
--- a/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """GreaterEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-greater_equal_op_info = AkgRegOp("GreaterEqual") \
+greater_equal_op_info = AkgGpuRegOp("GreaterEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py b/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
index 4e802c1cad..34e1e7f14a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSigmoid op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hsigmoid_op_info = AkgRegOp("HSigmoid") \
+hsigmoid_op_info = AkgGpuRegOp("HSigmoid") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py b/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
index 39b819138e..5e08ffb41c 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSigmoidGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hsigmoidgrad_op_info = AkgRegOp("HSigmoidGrad") \
+hsigmoidgrad_op_info = AkgGpuRegOp("HSigmoidGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hswish.py b/mindspore/ops/_op_impl/akg/gpu/hswish.py
index 29f20bafae..77d2c3b50c 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hswish.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hswish.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSwish op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hswish_op_info = AkgRegOp("HSwish") \
+hswish_op_info = AkgGpuRegOp("HSwish") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py b/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
index 38e8c78e28..3857486f0c 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSwishGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hswish_grad_op_info = AkgRegOp("HSwishGrad") \
+hswish_grad_op_info = AkgGpuRegOp("HSwishGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/lessequal.py b/mindspore/ops/_op_impl/akg/gpu/lessequal.py
index a8babf7ae4..58c9c7f90a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/lessequal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/lessequal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LessEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-lessequal_op_info = AkgRegOp("LessEqual") \
+lessequal_op_info = AkgGpuRegOp("LessEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_and.py b/mindspore/ops/_op_impl/akg/gpu/logical_and.py
index da5b696512..58abcd8064 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_and.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_and.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LogicalAnd op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logicaland_op_info = AkgRegOp("LogicalAnd") \
+logicaland_op_info = AkgGpuRegOp("LogicalAnd") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -23,6 +23,7 @@ logicaland_op_info = AkgRegOp("LogicalAnd") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logicaland_op_info)
 def _logical_and_akg():
     """LogicalAnd register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_not.py b/mindspore/ops/_op_impl/akg/gpu/logical_not.py
index 4b3c7bf647..33815f489a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_not.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_not.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 """LogicalNot op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logical_not_op_info = AkgRegOp("LogicalNot") \
+logical_not_op_info = AkgGpuRegOp("LogicalNot") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logical_not_op_info)
 def _logical_not_akg():
     """LogicalNot AutoDiff register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_or.py b/mindspore/ops/_op_impl/akg/gpu/logical_or.py
index 3a642511c6..163674ac2a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_or.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_or.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LogicalOr op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logicalor_op_info = AkgRegOp("LogicalOr") \
+logicalor_op_info = AkgGpuRegOp("LogicalOr") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -23,6 +23,7 @@ logicalor_op_info = AkgRegOp("LogicalOr") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logicalor_op_info)
 def _logical_or_akg():
     """LogicalOr register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/mean.py b/mindspore/ops/_op_impl/akg/gpu/mean.py
index b46b701b91..dd997ec0f1 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mean.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mean.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SimpleMean op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mean_op_info = AkgRegOp("SimpleMean") \
+mean_op_info = AkgGpuRegOp("SimpleMean") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/mean_grad.py b/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
index e3e0121c20..ae4620305a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SimpleMeanGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mean_grad_op_info = AkgRegOp("SimpleMeanGrad") \
+mean_grad_op_info = AkgGpuRegOp("SimpleMeanGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "HEAD") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/mul.py b/mindspore/ops/_op_impl/akg/gpu/mul.py
index db5b1460ed..0da7b3fb6c 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mul.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mul.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Mul op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mul_op_info = AkgRegOp("Mul") \
+mul_op_info = AkgGpuRegOp("Mul") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/notequal.py b/mindspore/ops/_op_impl/akg/gpu/notequal.py
index dc13449fc1..b9c9c55faf 100644
--- a/mindspore/ops/_op_impl/akg/gpu/notequal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/notequal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """NotEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-notequal_op_info = AkgRegOp("NotEqual") \
+notequal_op_info = AkgGpuRegOp("NotEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/relu6.py b/mindspore/ops/_op_impl/akg/gpu/relu6.py
index 31bfebcd8d..33ae7f4dad 100644
--- a/mindspore/ops/_op_impl/akg/gpu/relu6.py
+++ b/mindspore/ops/_op_impl/akg/gpu/relu6.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """ReLU6 op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-relu_op_info = AkgRegOp("ReLU6") \
+relu_op_info = AkgGpuRegOp("ReLU6") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py b/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
index 83d93f3077..c6ed702247 100644
--- a/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """ReLU6Grad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-relu_grad_op_info = AkgRegOp("ReLU6Grad") \
+relu_grad_op_info = AkgGpuRegOp("ReLU6Grad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze.py b/mindspore/ops/_op_impl/akg/gpu/squeeze.py
index cebf6ff1f3..8761b64890 100644
--- a/mindspore/ops/_op_impl/akg/gpu/squeeze.py
+++ b/mindspore/ops/_op_impl/akg/gpu/squeeze.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Squeeze op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-squeeze_op_info = AkgRegOp("Squeeze") \
+squeeze_op_info = AkgGpuRegOp("Squeeze") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
index 17e45a327a..41eacbf18f 100644
--- a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SqueezeGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-squeeze_grad_op_info = AkgRegOp("SqueezeGrad") \
+squeeze_grad_op_info = AkgGpuRegOp("SqueezeGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/sub.py b/mindspore/ops/_op_impl/akg/gpu/sub.py
index 06b92fb49e..eaa8124067 100644
--- a/mindspore/ops/_op_impl/akg/gpu/sub.py
+++ b/mindspore/ops/_op_impl/akg/gpu/sub.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Sub op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-sub_op_info = AkgRegOp("Sub") \
+sub_op_info = AkgGpuRegOp("Sub") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -25,6 +25,7 @@ sub_op_info = AkgRegOp("Sub") \
     .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
     .get_op_info()
 
+
 @op_info_register(sub_op_info)
 def _sub_akg():
     """Sub AutoDiff register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/tile.py b/mindspore/ops/_op_impl/akg/gpu/tile.py
index 8c9de00979..e8e634d9a1 100644
--- a/mindspore/ops/_op_impl/akg/gpu/tile.py
+++ b/mindspore/ops/_op_impl/akg/gpu/tile.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Tile op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-tile_op_info = AkgRegOp("Tile") \
+tile_op_info = AkgGpuRegOp("Tile") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/greater.py b/mindspore/ops/_op_impl/akg/greater.py
deleted file mode 100644
index 941946163a..0000000000
--- a/mindspore/ops/_op_impl/akg/greater.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Greater op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Greater",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float16", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _greater_akg():
-    """Greater AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/greater_equal.py b/mindspore/ops/_op_impl/akg/greater_equal.py
deleted file mode 100644
index 11642baa86..0000000000
--- a/mindspore/ops/_op_impl/akg/greater_equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""GreaterEqual op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "GreaterEqual",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _greater_equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/inplace_assign.py b/mindspore/ops/_op_impl/akg/inplace_assign.py
deleted file mode 100644
index 1cc40abe9b..0000000000
--- a/mindspore/ops/_op_impl/akg/inplace_assign.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""InplaceAssign op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "InplaceAssign",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "fake_output",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "y"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "z"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _inplace_assign_akg():
-    """InplaceAssign AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/less.py b/mindspore/ops/_op_impl/akg/less.py
deleted file mode 100644
index 499ed2e8fc..0000000000
--- a/mindspore/ops/_op_impl/akg/less.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Less op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Less",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _less_akg():
-    """Less AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/less_equal.py b/mindspore/ops/_op_impl/akg/less_equal.py
deleted file mode 100644
index 97fbdec090..0000000000
--- a/mindspore/ops/_op_impl/akg/less_equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""LessEqual op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "LessEqual",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _less_equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/log.py b/mindspore/ops/_op_impl/akg/log.py
deleted file mode 100644
index 526538d17d..0000000000
--- a/mindspore/ops/_op_impl/akg/log.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Log op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Log",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _log_akg():
-    """Log AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/matmul.py b/mindspore/ops/_op_impl/akg/matmul.py
deleted file mode 100644
index 084ba754fa..0000000000
--- a/mindspore/ops/_op_impl/akg/matmul.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MatMul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MatMul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "transpose_a",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "transpose_b",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x1"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x2"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _matmul_akg():
-    """MatMul AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max.py b/mindspore/ops/_op_impl/akg/max.py
deleted file mode 100644
index 21fd4ef9c4..0000000000
--- a/mindspore/ops/_op_impl/akg/max.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Max op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Max",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _max_akg():
-    """Max AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py
deleted file mode 100644
index 4adad3eb88..0000000000
--- a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MaxPoolGradWithArgmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MaxPoolGradWithArgmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "pad_mode",
-            "param_type": "optional",
-            "type": "str"
-         },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "argmax"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "grad"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _max_pool_grad_with_argmax_akg():
-    """MaxPoolGradWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py
deleted file mode 100644
index 3ae36d4793..0000000000
--- a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MaxPoolWithArgmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MaxPoolWithArgmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "pad_mode",
-            "param_type": "optional",
-            "type": "str"
-        },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "argmax"
-        }
-    ]
-}""")
-def _max_pool_with_argmax_akg():
-    """MaxPoolWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/maximum.py b/mindspore/ops/_op_impl/akg/maximum.py
deleted file mode 100644
index 8d8de5270a..0000000000
--- a/mindspore/ops/_op_impl/akg/maximum.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Maximum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Maximum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _maximum_akg():
-    """Maximum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mean.py b/mindspore/ops/_op_impl/akg/mean.py
deleted file mode 100644
index 0b49e76865..0000000000
--- a/mindspore/ops/_op_impl/akg/mean.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SimpleMean op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SimpleMean",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mean_akg():
-    """SimpleMean AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mean_grad.py b/mindspore/ops/_op_impl/akg/mean_grad.py
deleted file mode 100644
index 3b8379d1f0..0000000000
--- a/mindspore/ops/_op_impl/akg/mean_grad.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SimpleMeanGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SimpleMeanGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "input_shape",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "HEAD"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mean_grad_akg():
-    """SimpleMeanGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/minimum.py b/mindspore/ops/_op_impl/akg/minimum.py
deleted file mode 100644
index 759df2085f..0000000000
--- a/mindspore/ops/_op_impl/akg/minimum.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Minimum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Minimum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32",
-                 "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _minimum_akg():
-    """Minimum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mul.py b/mindspore/ops/_op_impl/akg/mul.py
deleted file mode 100644
index ab02c2d89e..0000000000
--- a/mindspore/ops/_op_impl/akg/mul.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Mul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Mul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "y_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "data_format",
-            "param_type": "required",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mul_akg():
-    """Mul AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/neg.py b/mindspore/ops/_op_impl/akg/neg.py
deleted file mode 100644
index bc00d60271..0000000000
--- a/mindspore/ops/_op_impl/akg/neg.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Neg op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Neg",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32",
-                 "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _neg_akg():
-    """Neg AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/one_hot.py b/mindspore/ops/_op_impl/akg/one_hot.py
deleted file mode 100644
index c5034dbbd4..0000000000
--- a/mindspore/ops/_op_impl/akg/one_hot.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""OneHot op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "OneHot",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "depth",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "indices"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "on_value"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "off_value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _one_hot_akg():
-    """OneHot AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/pow.py b/mindspore/ops/_op_impl/akg/pow.py
deleted file mode 100644
index d782968c05..0000000000
--- a/mindspore/ops/_op_impl/akg/pow.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Pow op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Pow",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "power"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _power_akg():
-    """Pow AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/real_div.py b/mindspore/ops/_op_impl/akg/real_div.py
deleted file mode 100644
index 9fa37a24e3..0000000000
--- a/mindspore/ops/_op_impl/akg/real_div.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""RealDiv op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "RealDiv",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _real_div_akg():
-    """RealDiv AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reciprocal.py b/mindspore/ops/_op_impl/akg/reciprocal.py
deleted file mode 100644
index 9fd7cc40b4..0000000000
--- a/mindspore/ops/_op_impl/akg/reciprocal.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Reciprocal op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Reciprocal",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reciprocal_akg():
-    """Reciprocal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_max.py b/mindspore/ops/_op_impl/akg/reduce_max.py
deleted file mode 100644
index b9db8ea83a..0000000000
--- a/mindspore/ops/_op_impl/akg/reduce_max.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceMax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceMax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_max_akg():
-    """ReduceMax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_mean.py b/mindspore/ops/_op_impl/akg/reduce_mean.py
deleted file mode 100644
index 0a4ffdf221..0000000000
--- a/mindspore/ops/_op_impl/akg/reduce_mean.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceMean op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceMean",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_mean_akg():
-    """ReduceMean AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_sum.py b/mindspore/ops/_op_impl/akg/reduce_sum.py
deleted file mode 100644
index 20d091ac76..0000000000
--- a/mindspore/ops/_op_impl/akg/reduce_sum.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceSum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceSum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        },
-        {
-            "name": "atomic_add",
-            "param_type": "optional",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_sum_akg():
-    """ReduceSum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/relu.py b/mindspore/ops/_op_impl/akg/relu.py
deleted file mode 100644
index b32725f885..0000000000
--- a/mindspore/ops/_op_impl/akg/relu.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReLU op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReLU",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _relu_akg():
-    """ReLU AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/relu_grad.py b/mindspore/ops/_op_impl/akg/relu_grad.py
deleted file mode 100644
index c785b750fe..0000000000
--- a/mindspore/ops/_op_impl/akg/relu_grad.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReluGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReluGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _relu_grad_akg():
-    """ReluGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reshape.py b/mindspore/ops/_op_impl/akg/reshape.py
deleted file mode 100644
index d200b66fa2..0000000000
--- a/mindspore/ops/_op_impl/akg/reshape.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Reshape op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Reshape",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "shape",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "tensor"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reshape_akg():
-    """Reshape AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/round.py b/mindspore/ops/_op_impl/akg/round.py
deleted file mode 100644
index 0625c3ceda..0000000000
--- a/mindspore/ops/_op_impl/akg/round.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Round op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Round",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _round_akg():
-    """Round AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/rsqrt.py b/mindspore/ops/_op_impl/akg/rsqrt.py
deleted file mode 100644
index 9264864f91..0000000000
--- a/mindspore/ops/_op_impl/akg/rsqrt.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Rsqrt op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Rsqrt",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _rsqrt_akg():
-    """Rsqrt AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/select.py b/mindspore/ops/_op_impl/akg/select.py
deleted file mode 100644
index 006c6a5444..0000000000
--- a/mindspore/ops/_op_impl/akg/select.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Select op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Select",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "condition"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _select_akg():
-    """Select AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/softmax.py b/mindspore/ops/_op_impl/akg/softmax.py
deleted file mode 100644
index a41c2aef36..0000000000
--- a/mindspore/ops/_op_impl/akg/softmax.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Softmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Softmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _softmax_akg():
-    """Softmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py
deleted file mode 100644
index e9e828f312..0000000000
--- a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SparseSoftmaxCrossEntropyWithLogits op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SparseSoftmaxCrossEntropyWithLogits",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "is_grad",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "sens",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "features"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "labels"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sparse_softmax_cross_entropy_with_logits_akg():
-    """SparseSoftmaxCrossEntropyWithLogits AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sqrt.py b/mindspore/ops/_op_impl/akg/sqrt.py
deleted file mode 100644
index fcaa84b3d4..0000000000
--- a/mindspore/ops/_op_impl/akg/sqrt.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sqrt op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sqrt",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sqrt_akg():
-    """Sqrt AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/strided_slice.py b/mindspore/ops/_op_impl/akg/strided_slice.py
deleted file mode 100644
index bdbd8dfc2f..0000000000
--- a/mindspore/ops/_op_impl/akg/strided_slice.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""StridedSlice op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "StridedSlice",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "begin",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "end",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "strides",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "begin_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "end_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "ellipsis_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "new_axis_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "shrink_axis_mask",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32",  "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32",  "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _strided_slice_akg():
-    """StridedSlice AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sub.py b/mindspore/ops/_op_impl/akg/sub.py
deleted file mode 100644
index 846aa280bb..0000000000
--- a/mindspore/ops/_op_impl/akg/sub.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sub op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sub",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sub_akg():
-    """Sub AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sum.py b/mindspore/ops/_op_impl/akg/sum.py
deleted file mode 100644
index 501b387b25..0000000000
--- a/mindspore/ops/_op_impl/akg/sum.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keepdims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sum_akg():
-    """Sum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/tile.py b/mindspore/ops/_op_impl/akg/tile.py
deleted file mode 100644
index bd13978fe7..0000000000
--- a/mindspore/ops/_op_impl/akg/tile.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Tile op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Tile",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "multiples",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _tile_akg():
-    """Tile AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/zeros_like.py b/mindspore/ops/_op_impl/akg/zeros_like.py
deleted file mode 100644
index a02ece22d7..0000000000
--- a/mindspore/ops/_op_impl/akg/zeros_like.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ZerosLike op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ZerosLike",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _zeros_like_akg():
-    """ZerosLike AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index 8009280ab8..317509b5a9 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -133,6 +133,7 @@ from .sparse_apply_proximal_adagrad import _sparse_apply_proximal_adagrad
 from .apply_proximal_adagrad import _apply_proximal_adagrad
 from .transpose_d import _transpose_d_tbe
 from .unsorted_segment_sum import _unsorted_segment_sum_tbe
+from .unsorted_segment_prod import _unsorted_segment_prod_tbe
 from .logsoftmax_grad import _logsoftmax_grad_tbe
 from .logsoftmax import _logsoftmax_tbe
 from .select import _select_tbe
@@ -285,3 +286,5 @@ from .mod import _mod_tbe
 from .max_pool_grad_grad import _max_pool_grad_grad_tbe
 from .max_pool_grad_grad_with_argmax import _max_pool_grad_grad_with_argmax_tbe
 from .tensor_move import _tensor_move_tbe
+from .population_count import _population_count_tbe
+from .parallel_concat import _parallel_concat_tbe
diff --git a/mindspore/ops/_op_impl/tbe/parallel_concat.py b/mindspore/ops/_op_impl/tbe/parallel_concat.py
new file mode 100644
index 0000000000..46d8736fab
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/parallel_concat.py
@@ -0,0 +1,80 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ParallelConcat op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+parallel_concat_op_info = TBERegOp("ParallelConcat") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("parallel_concat.so") \
+    .compute_cost(10) \
+    .kernel_name("parallel_concat") \
+    .partial_flag(True) \
+    .attr("shape", "required", "listInt", "all") \
+    .attr("N", "required", "int", "all") \
+    .input(0, "values", False, "dynamic", "all") \
+    .output(0, "output_data", False, "required", "all") \
+    .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.BOOL_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.I8_5HD, DataType.I8_5HD) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default) \
+    .dtype_format(DataType.U8_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.I16_Default, DataType.I16_Default) \
+    .dtype_format(DataType.I16_5HD, DataType.I16_5HD) \
+    .dtype_format(DataType.U16_Default, DataType.U16_Default) \
+    .dtype_format(DataType.U16_5HD, DataType.U16_5HD) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \
+    .dtype_format(DataType.U32_Default, DataType.U32_Default) \
+    .dtype_format(DataType.U32_5HD, DataType.U32_5HD) \
+    .dtype_format(DataType.I64_Default, DataType.I64_Default) \
+    .dtype_format(DataType.I64_5HD, DataType.I64_5HD) \
+    .dtype_format(DataType.U64_Default, DataType.U64_Default) \
+    .dtype_format(DataType.U64_5HD, DataType.U64_5HD) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \
+    .dtype_format(DataType.BOOL_NHWC, DataType.BOOL_NHWC) \
+    .dtype_format(DataType.BOOL_NCHW, DataType.BOOL_NCHW) \
+    .dtype_format(DataType.I8_NHWC, DataType.I8_NHWC) \
+    .dtype_format(DataType.I8_NCHW, DataType.I8_NCHW) \
+    .dtype_format(DataType.U8_NHWC, DataType.U8_NHWC) \
+    .dtype_format(DataType.U8_NCHW, DataType.U8_NCHW) \
+    .dtype_format(DataType.I16_NHWC, DataType.I16_NHWC) \
+    .dtype_format(DataType.I16_NCHW, DataType.I16_NCHW) \
+    .dtype_format(DataType.U16_NHWC, DataType.U16_NHWC) \
+    .dtype_format(DataType.U16_NCHW, DataType.U16_NCHW) \
+    .dtype_format(DataType.I32_NHWC, DataType.I32_NHWC) \
+    .dtype_format(DataType.I32_NCHW, DataType.I32_NCHW) \
+    .dtype_format(DataType.U32_NHWC, DataType.U32_NHWC) \
+    .dtype_format(DataType.U32_NCHW, DataType.U32_NCHW) \
+    .dtype_format(DataType.I64_NHWC, DataType.I64_NHWC) \
+    .dtype_format(DataType.I64_NCHW, DataType.I64_NCHW) \
+    .dtype_format(DataType.U64_NHWC, DataType.U64_NHWC) \
+    .dtype_format(DataType.U64_NCHW, DataType.U64_NCHW) \
+    .dtype_format(DataType.F16_NHWC, DataType.F16_NHWC) \
+    .dtype_format(DataType.F16_NCHW, DataType.F16_NCHW) \
+    .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC) \
+    .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW) \
+    .get_op_info()
+
+
+@op_info_register(parallel_concat_op_info)
+def _parallel_concat_tbe():
+    """ParallelConcat TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/population_count.py b/mindspore/ops/_op_impl/tbe/population_count.py
new file mode 100644
index 0000000000..14feded367
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/population_count.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""PopulationCount op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+population_count_op_info = TBERegOp("PopulationCount") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("population_count.so") \
+    .compute_cost(10) \
+    .kernel_name("population_count") \
+    .partial_flag(True) \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I16_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.I16_Default, DataType.U8_Default) \
+    .dtype_format(DataType.U16_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.U16_Default, DataType.U8_Default) \
+    .get_op_info()
+
+
+@op_info_register(population_count_op_info)
+def _population_count_tbe():
+    """PopulationCount TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/roi_align.py b/mindspore/ops/_op_impl/tbe/roi_align.py
index bc4eed80ce..d392651217 100644
--- a/mindspore/ops/_op_impl/tbe/roi_align.py
+++ b/mindspore/ops/_op_impl/tbe/roi_align.py
@@ -27,7 +27,7 @@ roi_align_op_info = TBERegOp("ROIAlign") \
     .attr("pooled_height", "required", "int", "all") \
     .attr("pooled_width", "required", "int", "all") \
     .attr("sample_num", "optional", "int", "all", "2") \
-    .attr("roi_end_mode", "optional", "0,1", "1") \
+    .attr("roi_end_mode", "optional", "int", "0,1", "1") \
     .input(0, "features", False, "required", "all") \
     .input(1, "rois", False, "required", "all") \
     .input(2, "rois_n", False, "optional", "all") \
diff --git a/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py b/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py
new file mode 100644
index 0000000000..40b04d17c3
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py
@@ -0,0 +1,48 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""UnsortedSegmentProdD op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+unsorted_segment_prod_d_op_info = TBERegOp("UnsortedSegmentProd") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("unsorted_segment_prod_d.so") \
+    .compute_cost(10) \
+    .kernel_name("unsorted_segment_prod_d") \
+    .partial_flag(True) \
+    .attr("num_segments", "required", "int", "all") \
+    .input(0, "data", False, "required", "all") \
+    .input(1, "segment_ids", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.I32_Default, DataType.F16_5HD) \
+    .dtype_format(DataType.F16_FracZ, DataType.I32_Default, DataType.F16_FracZ) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.I32_Default, DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.I32_Default, DataType.F32_5HD) \
+    .dtype_format(DataType.F32_FracZ, DataType.I32_Default, DataType.F32_FracZ) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.I32_Default, DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_Default, DataType.I32_5HD) \
+    .dtype_format(DataType.I32_FracZ, DataType.I32_Default, DataType.I32_FracZ) \
+    .dtype_format(DataType.I32_C1HWNCoC0, DataType.I32_Default, DataType.I32_C1HWNCoC0) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
+    .get_op_info()
+
+
+@op_info_register(unsorted_segment_prod_d_op_info)
+def _unsorted_segment_prod_tbe():
+    """UnsortedSegmentProdD TBE register"""
+    return
diff --git a/mindspore/ops/composite/base.py b/mindspore/ops/composite/base.py
index b0f16d82bf..0f28d9572f 100644
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -17,6 +17,7 @@
 
 """Basic composite operations."""
 from functools import partial
+from types import FunctionType
 
 from mindspore import context
 from ..._c_expression import EnvInstance_, GradOperation_, HyperMap_, Map_, MultitypeFuncGraph_, Tail_, \
@@ -25,6 +26,7 @@ from ...common import dtype as mstype
 from ...common.api import ms_function, _pynative_exec, _wrap_func
 from .. import functional as F
 from ...common.parameter import Parameter
+from ...common.tensor import Tensor
 
 
 __all__ = [EnvInstance_, TupleAdd_, TupleSlice_, UnpackCall_, TupleGetItemTensor_]
@@ -114,37 +116,48 @@ class GradOperation(GradOperation_):
         self.fn = None
         self.need_forward = False
 
+    def _pynative_forward_run(self, args, fn):
+        """ Pynative forward run to build grad graph. """
+        if self.sens_param:
+            args = args[:-1]
+        for arg in args:
+            if not isinstance(arg, Tensor):
+                raise TypeError("grad inputs should be tensor in pynative mode")
+        if isinstance(fn, FunctionType):
+            _pynative_exec.set_grad_flag(True)
+            _pynative_exec.new_graph(fn, *args)
+            output = fn(*args)
+            _pynative_exec.end_graph(fn, output, *args)
+        else:
+            if fn.is_run and not fn.requires_grad:
+                raise ValueError("obj must set_grad.")
+            if not fn.is_run:
+                self.need_forward = True
+                print("already has forward run before grad by user")
+            if self.need_forward:
+                fn.set_grad()
+                fn(*args)
+
     def __call__(self, fn, weights=None):
         grad_ = GradOperation('grad', self.get_all, self.get_by_list, self.sens_param)
         if self.grad_fn is None or self.fn != fn:
-            if self.get_by_list:
-                if context.get_context("mode") == context.GRAPH_MODE:
+            if context.get_context("mode") == context.GRAPH_MODE:
+                if self.get_by_list:
                     @ms_function(obj=fn)
                     def after_grad(*args):
                         return grad_(fn, weights)(*args)
                 else:
-                    @_wrap_func
+                    @ms_function(obj=fn)
                     def after_grad(*args):
-                        if fn.is_run and not fn.requires_grad:
-                            raise ValueError("obj must set_grad.")
-                        if not fn.is_run:
-                            self.need_forward = True
-                            print("already has forward run before grad by user")
-                        if self.need_forward:
-                            fn.set_grad()
-                            if self.sens_param:
-                                f_args = args[:-1]
-                                fn(*f_args)
-                            else:
-                                fn(*args)
-                        _pynative_exec.grad(grad_, fn, weights, *args)
-                        out = _pynative_exec(*args)
-                        _pynative_exec.clear()
-                        return out
+                        return grad_(fn)(*args)
             else:
-                @ms_function(obj=fn)
+                @_wrap_func
                 def after_grad(*args):
-                    return grad_(fn)(*args)
+                    self._pynative_forward_run(args, fn)
+                    _pynative_exec.grad(grad_, fn, weights, *args)
+                    out = _pynative_exec(*args)
+                    _pynative_exec.clear()
+                    return out
             self.grad_fn = after_grad
             self.fn = fn
         return self.grad_fn
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index a5c3165ab1..2be011cb77 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -158,7 +158,6 @@ make_indexed_slices = Primitive('MakeIndexedSlices')
 indexed_slices_get_values = Primitive('IndexedSlicesGetValues')
 indexed_slices_get_indices = Primitive('IndexedSlicesGetIndices')
 indexed_slices_get_dense_shape = Primitive('IndexedSlicesGetDenseShape')
-is_indexed_slices = Primitive('IsIndexedSlices')
 
 
 tensor_operator_registry.register('__add__', tensor_add)
@@ -166,6 +165,7 @@ tensor_operator_registry.register('__sub__', tensor_sub)
 tensor_operator_registry.register('__mul__', tensor_mul)
 tensor_operator_registry.register('__truediv__', tensor_div)
 tensor_operator_registry.register('__mod__', tensor_mod)
+tensor_operator_registry.register('__pow__', tensor_pow)
 tensor_operator_registry.register('__floordiv__', tensor_floordiv)
 #ms cannot support Tensor(True) compare
 tensor_operator_registry.register('__eq__', equal)
diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py
index a7a60b7181..6ab915e369 100644
--- a/mindspore/ops/op_info_register.py
+++ b/mindspore/ops/op_info_register.py
@@ -215,10 +215,10 @@ class RegOp:
 class AkgRegOp(RegOp):
     """Class for Akg op info register."""
 
-    def __init__(self, op_name):
+    def __init__(self, op_name, processor):
         super(AkgRegOp, self).__init__(op_name)
-        self.imply_type = "AutoDiff"
-        self.processor = "cuda"
+        self.imply_type = "AKG"
+        self.processor = processor
 
     def input(self, index=None, name=None, **kwargs):
         """
@@ -270,6 +270,16 @@ class AkgRegOp(RegOp):
         return self
 
 
+class AkgGpuRegOp(AkgRegOp):
+    def __init__(self, op_name):
+        super(AkgGpuRegOp, self).__init__(op_name, "CUDA")
+
+
+class AkgAscendRegOp(AkgRegOp):
+    def __init__(self, op_name):
+        super(AkgAscendRegOp, self).__init__(op_name, "AiCore")
+
+
 class AiCPURegOp(RegOp):
     """Class for AiCPU op info register"""
 
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 8806487579..1602f2594d 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -27,11 +27,11 @@ from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack,
                         Rank, Reshape, ResizeNearestNeighbor, ArgMinWithValue,
                         SameTypeShape, ScatterAdd, ScatterSub, ScatterMul, ScatterDiv, ScatterMax, ScatterMin,
                         ScatterUpdate, ScalarToArray, ScalarToTensor, ScatterNd, ScatterNdUpdate, Select,
-                        Shape, Size, Slice, Split, TransShape, EmbeddingLookup,
+                        Shape, Size, Slice, Split, TransShape, ParallelConcat,
                         Squeeze, StridedSlice, Tile, TensorScatterUpdate,
-                        Transpose, TruncatedNormal, TupleToArray, UnsortedSegmentMin,
+                        Transpose, TruncatedNormal, TupleToArray, UnsortedSegmentMin, UnsortedSegmentProd,
                         UnsortedSegmentSum, SpaceToDepth, DepthToSpace, SpaceToBatch, BatchToSpace,
-                        SpaceToBatchND, BatchToSpaceND, BroadcastTo, InplaceUpdate, ReverseSequence)
+                        SpaceToBatchND, BatchToSpaceND, BroadcastTo, InplaceUpdate, ReverseSequence, EmbeddingLookup)
 from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast,
                        _MirrorOperator, ReduceOp, _VirtualDataset,
                        _VirtualDiv, _GetTensorSlice,
@@ -62,7 +62,7 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
                      DropoutDoMask, DropoutGrad, Dropout,
                      DropoutGenMask, Flatten, FusedBatchNorm, BNTrainingReduce, BNTrainingUpdate,
                      Gelu, Elu,
-                     GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss,
+                     GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, CTCLossV2,
                      LogSoftmax,
                      MaxPool, DataFormatDimMap,
                      AvgPool, Conv2DBackpropInput, ConfusionMulGrad,
@@ -77,10 +77,10 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
                      ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
                      ApplyAddSign, ApplyPowerSign, ApplyGradientDescent, ApplyProximalGradientDescent,
                      ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK)
-from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
-                        CheckValid, MakeRefKey, Partial, Depend, CheckBprop)
 from . import _quant_ops
 from ._quant_ops import *
+from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, PopulationCount,
+                        CheckValid, MakeRefKey, Partial, Depend, CheckBprop, Push, Pull)
 from .thor_ops import *
 
 __all__ = [
@@ -260,6 +260,7 @@ __all__ = [
     'DepthwiseConv2dNative',
     'UnsortedSegmentSum',
     'UnsortedSegmentMin',
+    'UnsortedSegmentProd',
     "AllGather",
     "AllReduce",
     "ReduceScatter",
@@ -341,7 +342,12 @@ __all__ = [
     "InTopK",
     "CropAndResize",
     "LRN",
-    "Mod"
+    "Mod",
+    "PopulationCount",
+    "ParallelConcat",
+    "EmbeddingLookup",
+    "Push",
+    "Pull"
 ]
 
 __all__.sort()
diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py
index be7e901757..2d17da0028 100644
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -394,76 +394,6 @@ class AscendDequant(PrimitiveWithInfer):
         return mstype.float16
 
 
-class EmbeddingLookup(PrimitiveWithInfer):
-    """
-    Returns a slice of input tensor based on the specified indices.
-
-    This Primitive has the similar functionality as GatherV2 operating on `axis = 0`, but has three more inputs:
-    `offset`, `reduce_scatter_flag` and `split_num`. This primitive runs on the host instead of devices.
-
-    Inputs:
-        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
-          The Tensor slice, instead of the entire Tensor.
-        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
-          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
-          and the exceeding part will be filled with 0 in the output.
-        - **offset** (int) - Specifies the offset value of this `input_params` slice. Thus the real indices
-          are equal to `input_indices` minus `offset`.
-        - **reduce_scatter_flag** (bool) - Specifies whether perform reduce_scatter on host or not.
-          Only constant value is allowed.
-        - **split_num** (int) - Specifies the number of partitions of the reduce_scatter produces. This variable
-          is used only if `reduce_scatter_flag` is True. Only constant value is allowed.
-
-
-    Outputs:
-        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
-
-    Examples:
-        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
-        >>> input_indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
-        >>> offset = 4
-        >>> reduce_scatter_flag = False
-        >>> split_num = 1
-        >>> out = P.EmbeddingLookup()(input_params, input_indices, offset, reduce_scatter_flag, split_num)
-        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
-    """
-    @prim_attr_register
-    def __init__(self):
-        """init index_select"""
-        self.__setattr_flag__ = True
-        self.init_prim_io_names(inputs=['params', 'indices', 'offset', 'reduce_scatter_flag', 'split_num'],
-                                outputs=['output'])
-        self.add_prim_attr('primitive_target', 'CPU')
-
-    def __infer__(self, params, indices, offset, reduce_scatter_flag=False, split_num=2):
-        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
-        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
-        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
-        validator.check_subclass("split_num", split_num['dtype'], mstype.int_, self.name)
-        if split_num['value'] < 1:
-            raise ValueError("The parameter 'split_num' must be positive, but got %d." % split_num)
-        params_shp = params['shape']
-        out_shape = indices['shape'] + params_shp[1:]
-        if reduce_scatter_flag is None:
-            raise ValueError("The value of 'reduce_scatter_flag' is None.")
-        reduce_scatter_flag_value = reduce_scatter_flag['value']
-        if split_num is None:
-            raise ValueError("The value of 'split_num_value' is None.")
-        split_num_value = split_num['value']
-        if reduce_scatter_flag_value is True:
-            # Partition the tensor along the dimension 0. The shape size of dimension 0 should be divisible by
-            # (split_num * 8)
-            if out_shape[0] % (split_num_value * 8) != 0:
-                raise ValueError("The dimension 0 of the shape: %d, is not divisible by: %d." %
-                                 (out_shape[0], (split_num_value * 8)))
-            # After 'Concat' on host, the shape size of dimension 0 is: out_shape[0] // 8
-            out_shape[0] = out_shape[0] // 8
-        out = {'shape': out_shape,
-               'dtype': params['dtype'],
-               'value': None}
-        return out
-
-
 class SparseApplyFtrlNoReturn(PrimitiveWithInfer):
     """
     Update relevant entries according to the FTRL-proximal scheme.
@@ -747,7 +677,7 @@ class MatrixDiagPart(PrimitiveWithInfer):
         Tensor, data type same as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])].
 
     Examples:
-        >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
+        >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
         >>> assist = Tensor(np.arange(-12, 0).reshape(3, 2, 2), mindspore.float32)
         >>> matrix_diag_part = P.MatrixDiagPart()
         >>> result = matrix_diag_part(x, assist)
@@ -789,11 +719,11 @@ class MatrixSetDiag(PrimitiveWithInfer):
         Tensor, data type same as input `x`. The shape same as `x`.
 
     Examples:
-        >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
+        >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
         >>> diagonal = Tensor([[-1., 2.], [-1., 1.], [-1., 1.]], mindspore.float32)
         >>> matrix_set_diag = P.MatrixSetDiag()
         >>> result = matrix_set_diag(x, diagonal)
-        [[[-1, 0], [0, 2]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]]
+        [[[-1, 0], [0, 2]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]]
 
     """
 
@@ -812,10 +742,10 @@ class MatrixSetDiag(PrimitiveWithInfer):
         validator.check("x shape", x_shape, "assist shape", assist_shape, Rel.EQ, self.name)
 
         if x_shape[-2] < x_shape[-1]:
-            validator.check("x shape excluding the last dimension", x_shape[:-1], "diagnoal shape",
-                            diagonal_shape, Rel.EQ, self.name)
+            validator.check("diagnoal shape", diagonal_shape, "x shape excluding the last dimension",
+                            x_shape[:-1], Rel.EQ, self.name)
         else:
-            validator.check("x shape excluding the second to last dimension", x_shape[:-2]+x_shape[-1:],
-                            "diagonal shape", diagonal_shape, Rel.EQ, self.name)
+            validator.check("diagonal shape", diagonal_shape, "x shape excluding the second last dimension",
+                            x_shape[:-2] + x_shape[-1:], Rel.EQ, self.name)
 
         return assist_shape
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 7b7e8b2b64..1e28a56db1 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -601,51 +601,6 @@ class SparseGatherV2(GatherV2):
         >>> out = P.SparseGatherV2()(input_params, input_indices, axis)
     """
 
-class EmbeddingLookup(PrimitiveWithInfer):
-    """
-    Returns a slice of input tensor based on the specified indices and axis. This Primitive has the similar
-    functionality as GatherV2, but has one more inputs: `offset`.
-    This primitive runs on the acipu devices.
-
-    Inputs:
-        - **params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
-          The Tensor slice, instead of the entire Tensor.
-        - **indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
-          Specifies the indices of elements of the original Tensor. Values can be out of range of `params`,
-          and the exceeding part will be filled with 0 in the output.
-          The indices to do lookup operation whose data type should be mindspore.int32 or mindspore.int64.
-        - **offset** (int) - Specifies the offset value of this `params` slice. Thus the real indices
-          are equal to `indices` minus `offset`.
-
-
-    Outputs:
-        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
-
-    Examples:
-        >>> params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
-        >>> indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
-        >>> offset = 4
-        >>> out = P.EmbeddingLookup()(params, indices, offset)
-        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
-    """
-    @prim_attr_register
-    def __init__(self):
-        """init index_select"""
-        self.init_prim_io_names(inputs=['params', 'indices', 'offset'],
-                                outputs=['output'])
-
-    def __infer__(self, params, indices, offset):
-        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
-        valid_types = (mstype.int32, mstype.int64)
-        validator.check_tensor_type_same({"indices": indices['dtype']}, valid_types, self.name)
-        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
-        params_shp = params['shape']
-        out_shape = indices['shape'] + params_shp[1:]
-        out = {'shape': out_shape,
-               'dtype': params['dtype'],
-               'value': None}
-        return out
-
 
 class Split(PrimitiveWithInfer):
     """
@@ -688,8 +643,10 @@ class Split(PrimitiveWithInfer):
         validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name)
         validator.check_integer("output_num", self.output_num, 0, Rel.GT, self.name)
         output_valid_check = x_shape[self.axis] % self.output_num
-        validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ,
-                                self.name)
+        if output_valid_check != 0:
+            raise ValueError(f"x_shape[{self.axis}] {x_shape[self.axis]} must be divide exactly by"
+                             f" output_num {self.output_num}")
+
         x_shape[self.axis] = int(x_shape[self.axis] / self.output_num)
         out_shapes = []
         out_dtypes = []
@@ -1031,7 +988,7 @@ class InvertPermutation(PrimitiveWithInfer):
         values can not be negative.
 
     Inputs:
-        - **input_x** (Union(tuple[int]) - The input tuple is constructed by multiple
+        - **input_x** (Union(tuple[int], list[int]) - The input is constructed by multiple
           integers, i.e., :math:`(y_1, y_2, ..., y_S)` representing the indices.
           The values must include 0. There can be no duplicate values or negative values.
           Only constant value is allowed.
@@ -1059,6 +1016,12 @@ class InvertPermutation(PrimitiveWithInfer):
         validator.check_value_type("shape", x_shp, [tuple, list], self.name)
         if mstype.issubclass_(x['dtype'], mstype.tensor):
             raise ValueError(f'For \'{self.name}\' the input value must be non-Tensor.')
+        for shp in x_shp:
+            if shp != []:
+                x_rank = len(np.array(x_value, np.int64).shape)
+                raise ValueError(f'For \'{self.name}\' the rank of input must be 1, but got {x_rank}.')
+        for i, value in enumerate(x_value):
+            validator.check_value_type("input[%d]" % i, value, [int], self.name)
         z = [x_value[i] for i in range(len(x_value))]
         z.sort()
 
@@ -1457,6 +1420,58 @@ class UnsortedSegmentMin(PrimitiveWithInfer):
         return out
 
 
+class UnsortedSegmentProd(PrimitiveWithInfer):
+    """
+    Computes the product along segments of a tensor.
+
+    Inputs:
+        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
+          With float16, float32 or int32 data type.
+        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`. Data type must be int32.
+        - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`,
+          should be greater than 0.
+
+    Outputs:
+        Tensor, Set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.
+
+    Examples:
+        >>> input_x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [4, 2, 1]]).astype(np.float32))
+        >>> segment_ids = Tensor(np.array([0, 1, 0]).astype(np.int32))
+        >>> num_segments = 2
+        >>> unsorted_segment_prod = P.UnsortedSegmentProd()
+        >>> unsorted_segment_prod(input_x, segment_ids, num_segments)
+        [[4., 4., 3.], [4., 5., 6.]]
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init UnsortedSegmentProd"""
+        self.init_prim_io_names(inputs=['x', 'segment_ids', 'num_segments'], outputs=['y'])
+
+    def __infer__(self, x, segment_ids, num_segments):
+        x_type = x['dtype']
+        x_shape = x['shape']
+        segment_ids_shape = segment_ids['shape']
+        validator.check_subclass("input_x", x_type, mstype.tensor, self.name)
+        validator.check_value_type("x_shape", x_shape, [list], self.name)
+        valid_type = [mstype.float16, mstype.float32, mstype.int32]
+        validator.check_tensor_type_same({"x": x['dtype']}, valid_type, self.name)
+        validator.check_tensor_type_same({"segment_ids": segment_ids['dtype']}, [mstype.int32], self.name)
+        validator.check_integer("rank of segment_ids_shape", len(segment_ids_shape), 1, Rel.EQ, self.name)
+        validator.check(f'first shape of input_x', x_shape[0],
+                        'length of segments_id', segment_ids_shape[0], Rel.EQ, self.name)
+        num_segments_v = num_segments['value']
+        validator.check_value_type('num_segments', num_segments_v, [int], self.name)
+        validator.check_integer("num_segments", num_segments_v, 0, Rel.GT, self.name)
+        segment_ids_shape_len = len(segment_ids_shape)
+        out_shape = [num_segments_v]
+        out_shape += x_shape[segment_ids_shape_len:]
+        out = {'shape': out_shape,
+               'dtype': mstype.tensor_type(x_type.element_type()),
+               'value': None}
+        return out
+
+
 class Concat(PrimitiveWithInfer):
     r"""
     Concat tensor in specified axis.
@@ -1508,6 +1523,60 @@ class Concat(PrimitiveWithInfer):
         return out
 
 
+class ParallelConcat(PrimitiveWithInfer):
+    r"""
+    Concat tensor in the first dimension.
+
+    Concat input tensors along with the first dimension.
+
+    Note:
+        The input tensors are all required to have size 1 in the first dimension.
+
+    Inputs:
+        - **values** (tuple, list) - Tuple or list of input tensors. The data type and shape of these
+          tensors must be same.
+
+    Outputs:
+        Tensor, data type same as `values`.
+
+    Examples:
+        >>> data1 = Tensor(np.array([[0, 1]]).astype(np.int32))
+        >>> data2 = Tensor(np.array([[2, 1]]).astype(np.int32))
+        >>> op = P.ParallelConcat()
+        >>> output = op((data1, data2))
+        [[0, 1], [2, 1]]
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init ParallelConcat"""
+
+    def __infer__(self, values):
+        x_shp = values['shape']
+        x_type = values['dtype']
+
+        validator.check_integer(f'x_shp length', len(x_shp), 1, Rel.GE, self.name)
+
+        args = {f"x_type[{i}]": elem for i, elem in enumerate(x_type)}
+        validator.check_tensor_type_same(args, mstype.number_type + (mstype.bool_,), self.name)
+
+        first_elem = x_shp[0]
+        for i, elem in enumerate(x_shp[1:]):
+            j = i + 1
+            validator.check_integer(f'x_shp[{j}][0]', elem[0], 1, Rel.EQ, self.name)
+            validator.check(f"x_shp[0] shape", first_elem, f"x_shp[{j}] shape", elem, Rel.EQ, self.name)
+
+        ret_shp = x_shp[0].copy()
+        ret_shp[0] = len(x_shp)
+        self.add_prim_attr('shape', ret_shp)
+        self.add_prim_attr('N', len(x_shp))
+
+        out = {'shape': ret_shp,
+               'dtype': x_type[0],
+               'value': None}
+        return out
+
+
 def _get_pack_shape(x_shape, x_type, axis, prim_name):
     """for pack output shape"""
     validator.check_value_type("shape", x_shape, [tuple, list], prim_name)
@@ -3176,3 +3245,50 @@ class TransShape(PrimitiveWithInfer):
         return {'shape': shp,
                 'dtype': dtype,
                 'value': None}
+
+
+class EmbeddingLookup(PrimitiveWithInfer):
+    """
+    Returns a slice of input tensor based on the specified indices.
+
+    This Primitive has the similar functionality as GatherV2 operating on `axis = 0`, but has one more inputs:
+    `offset`.
+
+    Inputs:
+        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+          The Tensor slice, instead of the entire Tensor.
+        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
+          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
+          and the exceeding part will be filled with 0 in the output.
+        - **offset** (int) - Specifies the offset value of this `input_params` slice. Thus the real indices
+          are equal to `input_indices` minus `offset`.
+
+    Outputs:
+        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
+
+    Examples:
+        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
+        >>> input_indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
+        >>> offset = 4
+        >>> out = P.EmbeddingLookup()(input_params, input_indices, offset)
+        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
+    """
+    @prim_attr_register
+    def __init__(self):
+        """init index_select"""
+        self.__setattr_flag__ = True
+        self.init_prim_io_names(inputs=['params', 'indices', 'offset'],
+                                outputs=['output'])
+
+    def __infer__(self, params, indices, offset):
+        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
+        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
+        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
+        params_shp = params['shape']
+        if len(params_shp) != 2:
+            raise ValueError("The dimension of 'params' in EmbeddingLookup must be 2, but got %d." % len(params_shp))
+        out_shape = indices['shape'] + params_shp[1:]
+        out = {'shape': out_shape,
+               'dtype': params['dtype'],
+               'value': None}
+        return out
diff --git a/mindspore/ops/operations/image_ops.py b/mindspore/ops/operations/image_ops.py
index 1e366b5ea6..437cda3301 100644
--- a/mindspore/ops/operations/image_ops.py
+++ b/mindspore/ops/operations/image_ops.py
@@ -117,8 +117,8 @@ class CropAndResize(PrimitiveWithInfer):
         validator.check("crop_height", crop_size_value[0], "minimum", 0, Rel.GT, self.name)
         validator.check("crop_width", crop_size_value[1], "minimum", 0, Rel.GT, self.name)
         # check crop_size element type
-        validator.check("crop_height dtype", crop_size_dtype[0], mstype.int32, self.name)
-        validator.check("crop_width dtype", crop_size_dtype[1], mstype.int32, self.name)
+        validator.check("crop_height dtype", crop_size_dtype[0], "expected", mstype.int32, Rel.EQ, self.name)
+        validator.check("crop_width dtype", crop_size_dtype[1], "expected", mstype.int32, Rel.EQ, self.name)
 
         num_boxes = boxes_shape[0]
         crop_height = crop_size_value[0]
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 9acd75d8e4..a9bdf07d28 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -234,7 +234,7 @@ class Softsign(PrimitiveWithInfer):
         \text{output} = \frac{\text{input_x}}{1 + \abs{\text{input_x}}},
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor whose data type should be float.
+        - **input_x** (Tensor) - The input tensor whose data type should be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the `input_x`.
@@ -255,7 +255,7 @@ class Softsign(PrimitiveWithInfer):
         return input_x
 
     def infer_dtype(self, input_x):
-        validator.check_tensor_type_same({'input_x': input_x}, mstype.float_type, self.name)
+        validator.check_tensor_type_same({'input_x': input_x}, [mstype.float16, mstype.float32], self.name)
         return input_x
 
 
@@ -1014,6 +1014,8 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
     def infer_dtype(self, x_dtype, w_dtype):
         args = {'x': x_dtype, 'w': w_dtype}
         validator.check_tensor_type_same(args, mstype.number_type, self.name)
+        if x_dtype.element_type() == mstype.int8:
+            return mstype.tensor_type(mstype.int32)
         return x_dtype
 
 
@@ -1930,7 +1932,7 @@ class ApplyRMSProp(PrimitiveWithInfer):
         >>> decay = 0.0
         >>> momentum = 1e-10
         >>> epsilon = 0.001
-        >>> result = apply_rms(input_x, mean_square, moment, grad, learning_rate, decay, momentum, epsilon)
+        >>> result = apply_rms(input_x, mean_square, moment, learning_rate, grad, decay, momentum, epsilon)
         (-2.9977674, 0.80999994, 1.9987665)
     """
 
@@ -2772,6 +2774,7 @@ class ROIAlign(PrimitiveWithInfer):
             feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
             input feature map, the `spatial_scale` should be `fea_h / ori_h`.
         sample_num (int): Number of sampling points. Default: 2.
+        roi_end_mode (int): Number must be 0 or 1. Default: 1.
 
     Inputs:
         - **features** (Tensor) - The input features, whose shape should be `(N, C, H, W)`.
@@ -2788,22 +2791,25 @@ class ROIAlign(PrimitiveWithInfer):
     Examples:
         >>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)
         >>> rois = Tensor(np.array([[0, 0.2, 0.3, 0.2, 0.3]]), mindspore.float32)
-        >>> roi_align = P.ROIAlign(1, 1, 0.5, 2)
+        >>> roi_align = P.ROIAlign(2, 2, 0.5, 2)
         >>> output_tensor = roi_align(input_tensor, rois)
         >>> assert output_tensor == Tensor(np.array([[[[2.15]]]]), mindspore.float32)
     """
 
     @prim_attr_register
-    def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2):
+    def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1):
         """init ROIAlign"""
         validator.check_value_type("pooled_height", pooled_height, [int], self.name)
         validator.check_value_type("pooled_width", pooled_width, [int], self.name)
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
         validator.check_value_type("sample_num", sample_num, [int], self.name)
+        validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
+        validator.check_int_range("roi_end_mode", roi_end_mode, 0, 1, Rel.INC_BOTH, self.name)
         self.pooled_height = pooled_height
         self.pooled_width = pooled_width
         self.spatial_scale = spatial_scale
         self.sample_num = sample_num
+        self.roi_end_mode = roi_end_mode
 
     def infer_shape(self, inputs_shape, rois_shape):
         return [rois_shape[0], inputs_shape[1], self.pooled_height, self.pooled_width]
@@ -4803,19 +4809,19 @@ class CTCLoss(PrimitiveWithInfer):
         preprocess_collapse_repeated (bool): If True, repeated labels are collapsed prior to the CTC calculation.
                                              Default: False.
         ctc_merge_repeated (bool): If False, during CTC calculation, repeated non-blank labels will not be merged
-                                   and are interpreted as individual labels. This is a simplfied version if CTC.
+                                   and are interpreted as individual labels. This is a simplfied version of CTC.
                                    Default: True.
         ignore_longer_outputs_than_inputs (bool): If True, sequences with longer outputs than inputs will be ignored.
                                                   Default: False.
 
     Inputs:
         - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
-          :math:`(max_time, batch_size, num_class)`. `num_class` should be `num_labels + 1` classes, `num_labels`
-          indicates the number of actual labels. Blank labels are reserved.
+          :math:`(max_time, batch_size, num_classes)`. `num_classes` should be `num_labels + 1` classes, `num_labels`
+          indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
         - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] == [b, t]` means `labels_values[i]`
           stores the id for `(batch b, time t)`. The type must be int64 and rank must be 2.
         - **labels_values** (Tensor) - A `1-D` input tensor. The values associated with the given batch and time. The
-          type must be int32. `labels_values[i]` must in the range of `[0, num_class)`.
+          type must be int32. `labels_values[i]` must in the range of `[0, num_classes)`.
         - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
           The type must be int32. Each value in the tensor should not greater than `max_time`.
 
@@ -4849,6 +4855,7 @@ class CTCLoss(PrimitiveWithInfer):
     def infer_shape(self, inputs, labels_indices, labels_values, sequence_length):
         validator.check_integer("inputs rank", len(inputs), 3, Rel.EQ, self.name)
         validator.check_integer("labels_indices rank", len(labels_indices), 2, Rel.EQ, self.name)
+        validator.check_integer("labels_indices dim one", labels_indices[1], 2, Rel.EQ, self.name)
         validator.check_integer("labels_values rank", len(labels_values), 1, Rel.EQ, self.name)
         validator.check_integer("sequence_length rank", len(sequence_length), 1, Rel.EQ, self.name)
         validator.check('labels_indices size', labels_indices[0], 'labels_values size',
@@ -5027,8 +5034,7 @@ class LRN(PrimitiveWithInfer):
         bias (float): An offset (usually positive to avoid dividing by 0).
         alpha (float): A scale factor, usually positive.
         beta (float): An exponent.
-        norm_region (str): Specify normalization region. Options: "ACROSS_CHANNELS", "WITHIN_CHANNEL".
-                           Default: "ACROSS_CHANNELS".
+        norm_region (str): Specify normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
 
     Inputs:
         - **x** (Tensor) - A 4D Tensor with float16 or float32 data type.
@@ -5050,10 +5056,66 @@ class LRN(PrimitiveWithInfer):
         validator.check_value_type("alpha", alpha, [float], self.name)
         validator.check_value_type("beta", beta, [float], self.name)
         validator.check_value_type("norm_region", norm_region, [str], self.name)
+        validator.check_string('norm_region', norm_region, ['ACROSS_CHANNELS'], self.name)
+        validator.check_integer("depth_radius", depth_radius, 0, Rel.GE, self.name)
 
     def infer_dtype(self, x_dtype):
         validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32,), self.name)
         return x_dtype
 
     def infer_shape(self, x_shape):
+        validator.check_integer("x_shape", len(x_shape), 4, Rel.EQ, self.name)
         return x_shape
+
+class CTCLossV2(PrimitiveWithInfer):
+    r"""
+    Calculates the CTC(Connectionist Temporal Classification) loss. Also calculates the gradient.
+    Note:
+        - Cudnn Uses label value of for the `blank`
+
+    Inputs:
+        - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
+          :math:`(max_time, batch_size, num_class)`. `num_class` should be `num_labels + 1` classes, `num_labels`
+          indicates the number of actual labels. Blank labels are reserved.
+        - **labels** (Tensor) - The labels Tensor should be a `1-D` tensor whose shape is
+          :math:`(\sigma{label_lengths})`
+          or `2-D` tensor whose shape is
+          :math:`(max_time, max{label_lengths})`
+          The type must be int32.
+        - **input_lengths** (Tensor) - A `1-D` input tensor whose shape is
+          :math:`(batch_size,)`. The values should be batch. The type must be int32.
+        - **label_lengths** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
+          The type must be int32. Each value in the tensor should not greater than `max_time`.
+
+    Outputs:
+        - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch_size)`. Has the same
+          type with `inputs`.
+        - **gradient** (Tensor) - The gradient of `loss`. Has the same type and shape with `inputs`.
+
+    Examples:
+        >>> inputs = Tensor(np.random.random((2, 2, 3)), mindspore.float32)
+        >>> labels = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int32)
+        >>> input_lengths = Tensor(np.array([3, 3, 3]), mindspore.int32)
+        >>> label_lengths = Tensor(np.array([3, 3, 3]), mindspore.int32)
+        >>> ctc_loss = P.CTCLossV2()
+        >>> output = ctc_loss(inputs, labels, input_lengths, label_lengths)
+    """
+    @prim_attr_register
+    def __init__(self):
+        pass
+
+    def infer_dtype(self, input_dtype, labels_dtype, input_lengths_dtype, label_lengths_dtype):
+        validator.check_tensor_type_same({"input": input_dtype}, (mstype.float32,), self.name)
+        validator.check_tensor_type_same({"labels": labels_dtype}, (mstype.int32,), self.name)
+        validator.check_tensor_type_same({"input_lengths": input_lengths_dtype}, (mstype.int32,), self.name)
+        validator.check_tensor_type_same({"target_lengths": label_lengths_dtype}, (mstype.int32,), self.name)
+        return mstype.float32, mstype.float32
+
+    def infer_shape(self, input_shape, labels_shape, input_lengths_shape, label_lengths_shape):
+        validator.check_integer("input shape", len(input_shape), 3, Rel.EQ, self.name)
+        validator.check_number_range("labels shape", len(labels_shape), 1, 2, Rel.INC_BOTH, self.name)
+        validator.check_integer("input lengths shape", len(input_lengths_shape), 1, Rel.EQ, self.name)
+        validator.check_integer("label lengths shape", len(label_lengths_shape), 1, Rel.EQ, self.name)
+        validator.check_integer("input[1]", input_shape[1], input_lengths_shape[0], Rel.EQ, self.name)
+        validator.check_integer("input[1]", input_shape[1], label_lengths_shape[0], Rel.EQ, self.name)
+        return (input_shape[1],), input_shape
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index b6b938d800..a58403f883 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -51,6 +51,7 @@ class Assign(PrimitiveWithInfer):
         ('variable', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
         ('value', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
     )
+
     @prim_attr_register
     def __init__(self):
         self.init_prim_io_names(inputs=['ref', 'value'], outputs=['output'])
@@ -59,7 +60,9 @@ class Assign(PrimitiveWithInfer):
         return variable
 
     def infer_dtype(self, variable, value):
-        # Add a type validation later when we don't have to assign a value to RefKey.
+        if variable != mstype.type_refkey:
+            validator.check_tensor_type_same({"variable": variable}, mstype.number_type, self.name)
+        validator.check_scalar_or_tensor_type_same({"value": value}, mstype.number_type, self.name)
         return variable
 
 
@@ -324,6 +327,7 @@ class Partial(Primitive):
         partial_func = functools.partial(func, *args[1:])
         return partial_func
 
+
 class Depend(Primitive):
     """
     Depend is used for process side-effect operations.
@@ -457,3 +461,83 @@ class ConfusionMatrix(PrimitiveWithInfer):
         args = {"labels": labels, "predictions": predictions}
         validator.check_tensor_type_same(args, (mstype.number_type), self.name)
         return labels
+
+
+class PopulationCount(PrimitiveWithInfer):
+    r"""
+    Calculate population count.
+
+    Inputs:
+        - **input** (Tensor) -  The data type should be int16 or uint16.
+
+    Outputs:
+        Tensor, with shape same as the input.
+
+    Examples:
+        >>> population_count = P.PopulationCount()
+        >>> x_input = Tensor([0, 1, 3], mindspore.int16)
+        >>> population_count(x_input)
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        pass
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        args = {"x": x_dtype}
+        validator.check_tensor_type_same(args, (mstype.int16, mstype.uint16,), self.name)
+        return mstype.tensor_type(mstype.uint8)
+
+class Push(PrimitiveWithInfer):
+    """
+    Pushing the inputs of the corresponding optimizer to parameter server.
+
+    Args:
+        optim_type (string): The optimizer type. Default: 'ApplyMomentum'.
+        only_shape_indices (list): The indices of input of which only shape
+                                   will be pushed to parameter server. Default: None.
+
+    Inputs:
+        - **optim_inputs** (tuple) - The inputs for this kind of optimizer.
+        - **optim_input_shapes** (tuple) - The shapes of the inputs.
+
+    Outputs:
+        Tensor, the key of the weight which needs to be updated.
+    """
+
+    @prim_attr_register
+    def __init__(self, optim_type='ApplyMomentum', only_shape_indices=None):
+        """init Push"""
+        self.init_prim_io_names(inputs=['optim_inputs', 'optim_input_shapes'], outputs=['key'])
+
+    def infer_shape(self, inputs, shapes):
+        return [1]
+
+    def infer_dtype(self, inputs, shapes):
+        return mstype.uint64
+
+class Pull(PrimitiveWithInfer):
+    """
+    Pulling weight from parameter server.
+
+    Inputs:
+        - **key** (Tensor) - The key of the weight.
+        - **weight** (Tensor) - The weight to be updated.
+
+    Outputs:
+        None.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init Pull"""
+        self.init_prim_io_names(inputs=['key', 'weight'], outputs=['output'])
+
+    def infer_shape(self, key_shape, weight_shape):
+        return [1]
+
+    def infer_dtype(self, key_dtype, weight_dtype):
+        return mstype.float32
diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index 7ceb687778..cb34e9ff24 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -146,7 +146,7 @@ class Primitive(Primitive_):
         Check whether or not certain inputs should go into backend. Subclass in need should override this method.
 
         Args:
-            Same as arguments of current Primitive
+            *args(Primitive args): Same as arguments of current Primitive.
 
         Returns:
             A tuple of two elements, first element indicates whether or not we should filter out current arguments;
@@ -237,12 +237,14 @@ class PrimitiveWithInfer(Primitive):
         """
         Infer output shape based on input shape.
 
-        Args:
-            inputs (tuple(int)): dimensions of input tensors.
-            outputs (tuple(int)): dimensions of output tensors.
-
         Note:
             The shape of scalar is an empty tuple.
+
+        Args:
+            args (tuple(int)): shapes of input tensors.
+
+        Return:
+            `tuple(int)`, shapes of output tensors.
         """
         return None
 
@@ -251,8 +253,10 @@ class PrimitiveWithInfer(Primitive):
         Infer output dtype based on input dtype.
 
         Args:
-            inputs (mstype): data type of inputs.
-            outputs (mstype): data type of outputs.
+            args (:class:`mindspore.dtype`): data type of inputs.
+
+        Return:
+            :class:`mindspore.dtype`, data type of outputs.
         """
         return None
 
@@ -261,8 +265,10 @@ class PrimitiveWithInfer(Primitive):
         Infer output value based on input value at compile time.
 
         Args:
-            inputs (any): value of inputs.
-            outputs (any): value of outputs.
+            args (Any): value of inputs.
+
+        Return:
+            Value of outputs. Return `None` for, cat not infer the value at compile time.
         """
         return None
 
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index c5b4d57702..68f070d4a5 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -122,47 +122,6 @@ def _parameter_broadcast_check(parallel_mode, parameter_broadcast):
                          "do not support parameter broadcast, parallel_mode: {0}, parameter_broadcast:{1}"
                          .format(parallel_mode, parameter_broadcast))
 
-
-PARAMETER_CLONED_INDEX = 0
-
-
-class _CloneInfo():
-    """
-    The clone info of parameter.
-
-    Attributes:
-        be_cloned (bool): Whether the parameter is cloned.
-        cloned (bool): Whether the parameter clone from other parameter.
-        be_cloned_index (tuple): If the parameter is cloned, generate one index per clone.
-        cloned_index (int): If the parameter clone from other parameter, it has a unique index.
-    """
-    def __init__(self):
-        self.be_cloned = False
-        self.cloned = False
-        self.be_cloned_index = []
-        self.cloned_index = None
-
-
-def _set_clone_info(clone_from, clone_to):
-    """
-    Set the clone info.
-
-    Args:
-        clone_from (_CloneInfo): The clone info of be_cloned parameter.
-        clone_to (_CloneInfo): The clone info of cloned parameter.
-    """
-    global PARAMETER_CLONED_INDEX
-    clone_to.be_cloned = False
-    clone_to.cloned = True
-    clone_to.be_cloned_index = []
-    clone_to.cloned_index = PARAMETER_CLONED_INDEX
-
-    clone_from.be_cloned = True
-    clone_from.be_cloned_index.append(PARAMETER_CLONED_INDEX)
-
-    PARAMETER_CLONED_INDEX = PARAMETER_CLONED_INDEX + 1
-
-
 def _get_python_op(op_name, op_path, instance_name, arglist):
     """Get python operator."""
     module = __import__(op_path, fromlist=["None"])
diff --git a/mindspore/train/callback/_loss_monitor.py b/mindspore/train/callback/_loss_monitor.py
index 766777e878..15a095c5cb 100644
--- a/mindspore/train/callback/_loss_monitor.py
+++ b/mindspore/train/callback/_loss_monitor.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """LossMonitor Callback class."""
 
-import time
 import numpy as np
 from mindspore.common.tensor import Tensor
 
@@ -32,62 +31,32 @@ class LossMonitor(Callback):
 
     Args:
         per_print_times (int): Print loss every times. Default: 1.
-        lr_init (numpy array): train learning rate. Default: None.
 
     Raises:
         ValueError: If print_step is not int or less than zero.
-
-    Examples:
-        >>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
     """
 
-    def __init__(self, per_print_times=1, lr_init=None):
+    def __init__(self, per_print_times=1):
         super(LossMonitor, self).__init__()
         if not isinstance(per_print_times, int) or per_print_times < 0:
             raise ValueError("print_step must be int and >= 0.")
         self._per_print_times = per_print_times
-        self.lr_init = lr_init
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("Epoch time: {:5.3f}, per step time: {:5.3f}, "
-              "avg loss: {:5.3f}".format(epoch_mseconds,
-                                         per_step_mseconds,
-                                         np.mean(self.losses)))
-        print("*" * 60)
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
 
     def step_end(self, run_context):
         cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        step_loss = cb_params.net_outputs
+        loss = cb_params.net_outputs
 
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
+        if isinstance(loss, (tuple, list)):
+            if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
+                loss = loss[0]
 
-        self.losses.append(step_loss)
-        cur_step_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num) + 1
+        if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray):
+            loss = np.mean(loss.asnumpy())
 
-        if isinstance(step_loss, float) and (np.isnan(step_loss) or np.isinf(step_loss)):
-            raise ValueError("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
-                             "Invalid loss, terminating training.".format(
-                                 cb_params.cur_epoch_num - 1, cb_params.epoch_num,
-                                 cur_step_in_epoch, cb_params.batch_num))
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
 
+        if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
+            raise ValueError("epoch: {} step: {}. Invalid loss, terminating training.".format(
+                cb_params.cur_epoch_num, cur_step_in_epoch))
         if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
-            print("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
-                  "loss: [{:5.4f}], avg los: [{:5.4f}], time: [{:5.4f}ms]".format(
-                      cb_params.cur_epoch_num, cb_params.epoch_num,
-                      cur_step_in_epoch, int(cb_params.batch_num),
-                      step_loss, np.mean(self.losses),
-                      step_mseconds), flush=True)
+            print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, cur_step_in_epoch, loss), flush=True)
diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py
index 1550c3c55c..ded0e9a650 100644
--- a/mindspore/train/callback/_summary_collector.py
+++ b/mindspore/train/callback/_summary_collector.py
@@ -126,10 +126,12 @@ class SummaryCollector(Callback):
         >>>
         >>> # Only collect metric, custom lineage data and record data that collected by the summary operator,
         >>> # others are not collected
-        >>> specified = {'collect_metric':True, 'custom_lineage_data': {'version': 'resnet50_v1'}}
+        >>> specified = {'collect_metric': True}
         >>> summary_collector = SummaryCollector('./summary_dir',
         >>>                                      collect_specified_data=specified,
-        >>>                                      keep_default_action=False)
+        >>>                                      keep_default_action=False,
+        >>>                                      custom_lineage_data={'version': 'resnet50_v1'}
+        >>>                                      )
         >>> model.train(epoch, dataset, callbacks=summary_collector)
     """
 
diff --git a/mindspore/train/dataset_helper.py b/mindspore/train/dataset_helper.py
index 14797e568b..75e1deabc4 100644
--- a/mindspore/train/dataset_helper.py
+++ b/mindspore/train/dataset_helper.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """Dataset help for minddata dataset"""
 import math
+import os
 
 from mindspore._checkparam import check_bool
 from .. import context
@@ -60,7 +61,11 @@ class DatasetHelper:
                 if context.get_context("device_target") == "Ascend":
                     iterclass = _DatasetIterMSLoopSink
                 elif context.get_context("device_target") == "GPU":
-                    iterclass = _DatasetIterMS
+                    ms_role = os.getenv("MS_ROLE")
+                    if ms_role in ("MS_PSERVER", "MS_SCHED"):
+                        iterclass = _DatasetIterPSLite
+                    else:
+                        iterclass = _DatasetIterMS
                 elif context.get_context("device_target") == "CPU":
                     raise RuntimeError("Currently dataset sink mode is not supported when the device target is CPU.")
         else:
@@ -131,6 +136,9 @@ class _DatasetIterMSLoopSink(_DatasetIter):
     def __init__(self, dataset):
         super(_DatasetIterMSLoopSink, self).__init__(dataset)
         self.loop_count = self.get_loop_count(dataset)
+        ms_role = os.getenv("MS_ROLE")
+        if ms_role in ("MS_PSERVER", "MS_SCHED"):
+            self.loop_count = 1
         # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, and not using full_batch,
         # use a complete tensor to compile, and slice tensor to run. The batch dimension of tensors for
         # compile is device_number times the batch dimension of tensors for run. Now only support LoopSink.
@@ -154,6 +162,18 @@ class _DatasetIterMS(_DatasetIter):
         self.op = GetNextSingleOp(self.dataset_types, self.dataset_shapes, queue_name)
 
 
+class _DatasetIterPSLite(_DatasetIter):
+    """Iter for context (device_target=GPU) on MS_PSERVER or MS_SCHED"""
+    def __init__(self, dataset):
+        super(_DatasetIterPSLite, self).__init__(dataset)
+        self.loop_count = 1
+        self.loop_size = 1
+        self.op = None
+        def op():
+            return _construct_tensor_list(self.dataset_types, self.dataset_shapes, batch_expand_num=1)
+        self.op = op
+
+
 class _DatasetIterGE(_DatasetIter):
     """Iter for ge"""
     def __init__(self, dataset):
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 79bd6bc90b..74fd668e82 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -15,6 +15,7 @@
 """Model."""
 from collections.abc import Iterable
 
+import os
 import numpy as np
 
 from mindspore import log as logger
@@ -350,6 +351,9 @@ class Model:
         cb_params.train_dataset = train_dataset
         cb_params.list_callback = self._transform_callbacks(callbacks)
         cb_params.train_dataset_element = None
+        ms_role = os.getenv("MS_ROLE")
+        if ms_role in ("MS_PSERVER", "MS_SCHED"):
+            epoch = 1
 
         # build callback list
         with _CallbackManager(callbacks) as list_callback:
diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py
index bc44ba22c2..b553373f10 100644
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -33,8 +33,10 @@ from ...ops.operations import _inner_ops as inner
 from ...train import serialization
 from . import quant_utils
 
-_ACTIVATION_MAP = {nn.ReLU: quant.ReLUQuant,
-                   nn.ReLU6: quant.ReLU6Quant,
+_ACTIVATION_MAP = {nn.ReLU: quant.ActQuant,
+                   nn.ReLU6: quant.ActQuant,
+                   nn.LeakyReLU: quant.ActQuant,
+                   nn.Sigmoid: quant.ActQuant,
                    nn.HSigmoid: quant.HSigmoidQuant,
                    nn.HSwish: quant.HSwishQuant}
 
@@ -112,7 +114,6 @@ class ConvertToQuantNetwork:
     def run(self):
         self.network.update_cell_prefix()
         network = self._convert_subcells2quant(self.network)
-        network = _AddFakeQuantInput(network)
         self.network.update_cell_type("quant")
         return network
 
@@ -257,9 +258,9 @@ class ConvertToQuantNetwork:
     def _convert_activation(self, activation):
         act_class = activation.__class__
         if act_class not in _ACTIVATION_MAP:
-            raise ValueError(
-                "Unsupported activation in auto quant: ", act_class)
-        return _ACTIVATION_MAP[act_class](num_bits=self.act_bits,
+            raise ValueError("Unsupported activation in auto quant: ", act_class)
+        return _ACTIVATION_MAP[act_class](activation=act_class,
+                                          num_bits=self.act_bits,
                                           quant_delay=self.act_qdelay,
                                           per_channel=self.act_channel,
                                           symmetric=self.act_symmetric,
@@ -273,16 +274,20 @@ class ExportToQuantInferNetwork:
     Args:
         network (Cell): MindSpore network API `convert_quant_network`.
         inputs (Tensor): Input tensors of the `quantization aware training network`.
+        mean (int): Input data mean. Default: 127.5.
+        std_dev (int, float): Input data variance. Default: 127.5.
 
     Returns:
         Cell, GEIR backend Infer network.
     """
     __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"]
 
-    def __init__(self,
-                 network,
-                 *inputs):
+    def __init__(self, network, mean, std_dev, *inputs):
         network = validator.check_isinstance('network', network, (nn.Cell,))
+        # quantize for inputs: q = f / scale + zero_point
+        # dequantize for outputs: f = (q - zero_point) * scale
+        self.input_scale = round(mean)
+        self.input_zero_point = 1 / std_dev
         self.data_type = mstype.int8
         self.network = copy.deepcopy(network)
         self.all_parameters = {p.name: p for p in self.network.get_parameters()}
@@ -313,11 +318,14 @@ class ExportToQuantInferNetwork:
         info = self.quant_info_table.get(w_minq_name, None)
         if info:
             fack_quant_a_in_op, minq_name = info
-            maxq = self.all_parameters[minq_name[:-4] + "maxq"]
-            minq = self.all_parameters[minq_name]
-            scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fack_quant_a_in_op, maxq, minq, np_type)
+            if minq_name == 'input':
+                scale_a_in, zp_a_in = self.input_scale, self.input_zero_point
+            else:
+                maxq = self.all_parameters[minq_name[:-4] + "maxq"]
+                minq = self.all_parameters[minq_name]
+                scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fack_quant_a_in_op, maxq, minq, np_type)
         else:
-            logger.warning(f"Do not find `fake_quant` from input with `fack_quant.minq` {w_minq_name}")
+            logger.warning(f"Do not find `fake_quant` from input with `fake_quant.minq` {w_minq_name}")
             return None
 
         # Build the `Quant` `Dequant` op.
@@ -325,7 +333,7 @@ class ExportToQuantInferNetwork:
         quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
         sqrt_mode = False
         scale_deq = scale_a_out * scale_w
-        if scale_deq < 2 ** -14:
+        if (scale_deq < 2 ** -14).all():
             scale_deq = np.sqrt(scale_deq)
             sqrt_mode = True
         dequant_op = inner.AscendDequant(sqrt_mode)
@@ -393,7 +401,7 @@ class ExportToQuantInferNetwork:
         return network
 
 
-def export(network, *inputs, file_name, file_format='GEIR'):
+def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='GEIR'):
     """
     Exports MindSpore quantization predict model to deploy with GEIR.
 
@@ -401,16 +409,27 @@ def export(network, *inputs, file_name, file_format='GEIR'):
         network (Cell): MindSpore network produced by `convert_quant_network`.
         inputs (Tensor): Inputs of the `quantization aware training network`.
         file_name (str): File name of model to export.
+        mean (int): Input data mean. Default: 127.5.
+        std_dev (int, float): Input data variance. Default: 127.5.
         file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
             - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
     """
+    supported_device = ["Ascend"]
     supported_formats = ['GEIR']
 
+    mean = validator.check_type("mean", mean, (int, float))
+    std_dev = validator.check_type("std_dev", std_dev, (int, float))
+
+    if context.get_context('device_target') not in supported_device:
+        raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
+
     if file_format not in supported_formats:
         raise ValueError('Illegal file format {}.'.format(file_format))
 
+    network.set_train(False)
+
     if file_format == 'GEIR':
-        exporter = ExportToQuantInferNetwork(network, *inputs)
+        exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
         deploy_net = exporter.run()
         serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
 
diff --git a/mindspore/train/quant/quant_utils.py b/mindspore/train/quant/quant_utils.py
index c4a8004012..69505970fd 100644
--- a/mindspore/train/quant/quant_utils.py
+++ b/mindspore/train/quant/quant_utils.py
@@ -45,7 +45,7 @@ def cal_quantization_params(input_min,
         raise ValueError("input min shape should equal to input max.")
     if len(input_min.shape) > 1:
         raise ValueError("input min and max shape should be one dim.")
-    if input_min > input_max:
+    if (input_min > input_max).all():
         raise ValueError("input_min min should less than input max.")
     if (input_max == input_min).all():
         # scale = 1.0, zp = 0.0
@@ -85,9 +85,7 @@ def cal_quantization_params(input_min,
     return scale, zp
 
 
-def weight2int(data,
-               scale,
-               zero_point):
+def weight2int(data, scale, zero_point):
     r"""
     Calculate int8/uint8 weight from fp32. the formula is defined as:
 
@@ -103,12 +101,25 @@ def weight2int(data,
         weight (numpy.ndarray): The dimension of channel or 1.
     """
     if scale.shape != zero_point.shape:
-        raise ValueError("scale and zero_point should have the same shape.")
-    if scale.shape[0] > 0:
-        scale = scale.reshape(1, -1)
-        zero_point = zero_point.reshape(1, -1)
+        raise ValueError("`scale` and `zero_point` should have the same shape.")
+    if scale.shape[0] < 0:
+        raise ValueError("`scale` and `zero_point` shape should greater than zero.")
+    if len(scale.shape) > 1:
+        # for perchannel
+        if scale.shape[0] == data.shape[0]:
+            # `Conv2d` or `Dense` op weight
+            shape_list = [-1] + [1] * len(data.shape[1:])
+            scale = scale.reshape(shape_list)
+            zero_point = zero_point.reshape(shape_list)
+        elif scale.shape[0] == data.shape[1]:
+            # `DepthwiseConv2d` op weight
+            shape_list = [1, -1] + [1] * len(data.shape[2:])
+            scale = scale.reshape(shape_list)
+            zero_point = zero_point.reshape(shape_list)
+        else:
+            raise ValueError("Unsupported weight shape({})".format(data.shape))
 
-    return np.round((data/scale) + zero_point)
+    return np.round((data / scale) + zero_point)
 
 
 def scale_zp_from_fack_quant_cell(cell, data_type):
@@ -183,9 +194,20 @@ def fold_batchnorm(weight, cell_quant):
     beta = cell_quant.beta.data.asnumpy()
     epsilon = cell_quant.eps
     sigma = np.sqrt(variance + epsilon)
-    gamma = gamma.reshape(-1, 1, 1, 1)
-    sigma = sigma.reshape(-1, 1, 1, 1)
-    mean = mean.reshape(-1, 1, 1, 1)
-    weight = weight * gamma / sigma
+
+    if gamma.shape[0] == weight.shape[0]:
+        # `Conv2d` or `Dense` op weight
+        shape_list = [-1] + [1] * len(weight.shape[1:])
+        _gamma = gamma.reshape(shape_list)
+        _sigma = sigma.reshape(shape_list)
+    elif gamma.shape[0] == weight.shape[1]:
+        # `DepthwiseConv2d` op weight
+        shape_list = [1, -1] + [1] * len(weight.shape[2:])
+        _gamma = gamma.reshape(shape_list)
+        _sigma = sigma.reshape(shape_list)
+    else:
+        raise ValueError("Unsupported weight shape({})".format(weight.shape))
+
+    weight = weight * _gamma / _sigma
     bias = beta - gamma * mean / sigma
     return weight, bias
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index d74bee2706..bc74986321 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -302,7 +302,7 @@ def _save_graph(network, file_name):
     if graph_proto:
         with open(file_name, "wb") as f:
             f.write(graph_proto)
-        os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
+        os.chmod(file_name, stat.S_IRUSR)
 
 
 def _exec_save_checkpoint(train_network, ckpt_file_name, integrated_save=True):
@@ -424,6 +424,7 @@ def export(net, *inputs, file_name, file_format='GEIR'):
     if is_training:
         net.set_train(mode=False)
     # export model
+    net.init_parameters_data()
     if file_format == 'GEIR':
         _executor.compile(net, *inputs, phase='export')
         _executor.export(net, file_name, file_format)
@@ -462,19 +463,18 @@ def parse_print(print_file_name):
         List, element of list is Tensor.
 
     Raises:
-        ValueError: Print file is incorrect.
+        ValueError: The print file may be empty, please make sure enter the correct file name.
     """
-    if not os.path.realpath(print_file_name):
-        raise ValueError("Please input the correct print file name.")
+    print_file_path = os.path.realpath(print_file_name)
 
-    if os.path.getsize(print_file_name) == 0:
+    if os.path.getsize(print_file_path) == 0:
         raise ValueError("The print file may be empty, please make sure enter the correct file name.")
 
     logger.info("Execute load print process.")
     print_list = Print()
 
     try:
-        with open(print_file_name, "rb") as f:
+        with open(print_file_path, "rb") as f:
             pb_content = f.read()
         print_list.ParseFromString(pb_content)
     except BaseException as e:
diff --git a/model_zoo/README.md b/model_zoo/README.md
index 2dde985679..1e392445af 100644
--- a/model_zoo/README.md
+++ b/model_zoo/README.md
@@ -134,43 +134,41 @@ In order to facilitate developers to enjoy the benefits of MindSpore framework a
 
 | Parameters                 | AlexNet |
 | -------------------------- | ------- |
-| Published Year             |         |
-| Paper                      |         |
-| Resource                   |         |
-| Features                   |         |
-| MindSpore Version          |         |
-| Dataset                    |         |
-| Training Parameters        |         |
-| Optimizer                  |         |
-| Loss Function              |         |
-| Accuracy                   |         |
-| Speed                      |         |
-| Loss                       |         |
-| Params (M)                 |         |
-| Checkpoint for Fine tuning |         |
-| Model for inference        |         |
-| Scripts                    |         |
+| Published Year             | 2012                                                               |
+| Paper                      | [ImageNet Classification with Deep Convolutional Neural Networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-)        |
+| Resource                   | Ascend 910                                                         |
+| Features                   | support with Ascend, GPU                                           |
+| MindSpore Version          | 0.5.0-beta                                                         |
+| Dataset                    | CIFAR10                                                              |
+| Training Parameters        | epoch=30, batch_size=32                                            |
+| Optimizer                  | Momentum                                                           |
+| Loss Function              | SoftmaxCrossEntropyWithLogits                                      |
+| Accuracy                   | 88.23%                                                             |
+| Speed                      | 1481fps                                                            |
+| Loss                       | 0.108                                                                 |
+| Params (M)                 | 61.10                                                                 | 
+| Checkpoint for Fine tuning | 445MB(.ckpt file)                                                    |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/alexnet|
 
 #### [LeNet](#table-of-contents)
 
 | Parameters                 | LeNet |
 | -------------------------- | ----- |
-| Published Year             |       |
-| Paper                      |       |
-| Resource                   |       |
-| Features                   |       |
-| MindSpore Version          |       |
-| Dataset                    |       |
-| Training Parameters        |       |
-| Optimizer                  |       |
-| Loss Function              |       |
-| Accuracy                   |       |
-| Speed                      |       |
-| Loss                       |       |
-| Params (M)                 |       |
-| Checkpoint for Fine tuning |       |
-| Model for inference        |       |
-| Scripts                    |       |
+| Published Year             | 1998                                                             |
+| Paper                      | [Gradient-Based Learning Applied to Document Recognition](https://ieeexplore.ieee.org/abstract/document/726791)          |
+| Resource                   | Ascend 910             |
+| Features                   | support with Ascend, GPU, CPU                                  |
+| MindSpore Version          | 0.5.0-beta                                                       |
+| Dataset                    | MNIST                                                          |
+| Training Parameters        | epoch=10, batch_size=32                                          |
+| Optimizer                  | Momentum                                                    |
+| Loss Function              | SoftmaxCrossEntropyWithLogits                                    |
+| Accuracy                   | 98.52%                                                           |
+| Speed                      | 18680fps                                                         |
+| Loss                       | 0.004                                                            |
+| Params (M)                 | 0.06                                                            |
+| Checkpoint for Fine tuning | 483KB(.ckpt file)                                                |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/lenet|
 
 ### Object Detection and Segmentation 
 
diff --git a/model_zoo/Transformer/train.py b/model_zoo/Transformer/train.py
index 23c0eb78fd..ffd6b8c714 100644
--- a/model_zoo/Transformer/train.py
+++ b/model_zoo/Transformer/train.py
@@ -147,10 +147,11 @@ def run_transformer_train():
 
     callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack()]
     if args.enable_save_ckpt == "true":
-        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.save_checkpoint_steps,
-                                       keep_checkpoint_max=args.save_checkpoint_num)
-        ckpoint_cb = ModelCheckpoint(prefix='transformer', directory=args.save_checkpoint_path, config=ckpt_config)
-        callbacks.append(ckpoint_cb)
+        if device_num == 1 or (device_num > 1 and rank_id == 0):
+            ckpt_config = CheckpointConfig(save_checkpoint_steps=args.save_checkpoint_steps,
+                                           keep_checkpoint_max=args.save_checkpoint_num)
+            ckpoint_cb = ModelCheckpoint(prefix='transformer', directory=args.save_checkpoint_path, config=ckpt_config)
+            callbacks.append(ckpoint_cb)
 
     if args.enable_lossscale == "true":
         scale_manager = DynamicLossScaleManager(init_loss_scale=cfg.init_loss_scale_value,
diff --git a/model_zoo/alexnet/eval.py b/model_zoo/alexnet/eval.py
index 4190451632..6a091aedd8 100644
--- a/model_zoo/alexnet/eval.py
+++ b/model_zoo/alexnet/eval.py
@@ -20,7 +20,7 @@ python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
 
 import argparse
 from src.config import alexnet_cfg as cfg
-from src.dataset import create_dataset_mnist
+from src.dataset import create_dataset_cifar10
 from src.alexnet import AlexNet
 import mindspore.nn as nn
 from mindspore import context
@@ -50,8 +50,8 @@ if __name__ == "__main__":
     print("============== Starting Testing ==============")
     param_dict = load_checkpoint(args.ckpt_path)
     load_param_into_net(network, param_dict)
-    ds_eval = create_dataset_mnist(args.data_path,
-                                   cfg.batch_size,
-                                   status="test")
+    ds_eval = create_dataset_cifar10(args.data_path,
+                                     cfg.batch_size,
+                                     status="test")
     acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
     print("============== {} ==============".format(acc))
diff --git a/model_zoo/alexnet/src/dataset.py b/model_zoo/alexnet/src/dataset.py
index 6e9f310bed..651c76d6e3 100644
--- a/model_zoo/alexnet/src/dataset.py
+++ b/model_zoo/alexnet/src/dataset.py
@@ -23,7 +23,7 @@ from mindspore.common import dtype as mstype
 from .config import alexnet_cfg as cfg
 
 
-def create_dataset_mnist(data_path, batch_size=32, repeat_size=1, status="train"):
+def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="train"):
     """
     create dataset for train or test
     """
diff --git a/model_zoo/alexnet/train.py b/model_zoo/alexnet/train.py
index 184290c26c..df038d62a2 100644
--- a/model_zoo/alexnet/train.py
+++ b/model_zoo/alexnet/train.py
@@ -20,7 +20,7 @@ python train.py --data_path /YourDataPath
 
 import argparse
 from src.config import alexnet_cfg as cfg
-from src.dataset import create_dataset_mnist
+from src.dataset import create_dataset_cifar10
 from src.generator_lr import get_lr
 from src.alexnet import AlexNet
 import mindspore.nn as nn
@@ -43,7 +43,7 @@ if __name__ == "__main__":
 
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
-    ds_train = create_dataset_mnist(args.data_path, cfg.batch_size, cfg.epoch_size)
+    ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, cfg.epoch_size)
     network = AlexNet(cfg.num_classes)
     loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
     lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
diff --git a/model_zoo/bert/README.md b/model_zoo/bert/README.md
index 3ed2bf6783..45928da4e3 100644
--- a/model_zoo/bert/README.md
+++ b/model_zoo/bert/README.md
@@ -5,9 +5,9 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
 ## Requirements
 - Install [MindSpore](https://www.mindspore.cn/install/en).
 - Download the zhwiki dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
-- Download the CLUE/SQuAD v1.1 dataset for fine-tuning and evaluation.
+- Download dataset for fine-tuning and evaluation such as CLUENER, TNEWS, SQuAD v1.1, etc.
 >  Notes:
-   If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.
+   If you are running a fine-tuning or evaluation task, prepare a checkpoint from pre-train.
 
 ## Running the Example
 ### Pre-Training
@@ -24,31 +24,15 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
     sh scripts/run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH
     ```  
 
-### Fine-Tuning
-- Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'pre_training_file' are set to your own path. Set the 'pre_training_ckpt' to a saved checkpoint file generated after pre-training.
+### Fine-Tuning and Evaluation
+- Set bert network config and optimizer hyperparameters in `finetune_eval_config.py`. 
 
-- Run `finetune.py` for fine-tuning of BERT-base and BERT-NEZHA model.
+- Set task related hyperparameters in scripts/run_XXX.sh. 
 
-    ```bash
-    python finetune.py
-    ```
-
-### Evaluation
-- Set options in `evaluation_config.py`. Make sure the 'data_file', 'schema_file' and 'finetune_ckpt' are set to your own path.
-
-- NER: Run `evaluation.py` for evaluation of BERT-base and BERT-NEZHA model.
-
-    ```bash
-    python evaluation.py
-    ```
-- SQuAD v1.1: Run `squadeval.py` and  `SQuAD_postprocess.py` for evaluation of BERT-base and BERT-NEZHA model.
-
-    ```bash
-    python squadeval.py
-    ```
+- Run `bash scripts/run_XXX.py` for fine-tuning of BERT-base and BERT-NEZHA model.
 
     ```bash
-    python SQuAD_postprocess.py
+    bash scripts/run_XXX.sh
     ```
 
 ## Usage
@@ -88,26 +72,56 @@ config.py:
     scale_window                    steps for once updatation of loss scale: N, default is 1000   
     optimizer                       optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
 
-finetune_config.py:
-    task                            task type: SeqLabeling | Regression | Classification | COLA | SQUAD
-    num_labels                      number of labels to do classification
-    data_file                       dataset file to load: PATH, default is "/your/path/train.tfrecord"
-    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
-    epoch_num                       repeat counts of training: N, default is 5
-    ckpt_prefix                     prefix used to save checkpoint files: PREFIX, default is "bert"
-    ckpt_dir                        path to save checkpoint files: PATH, default is None
-    pre_training_ckpt               checkpoint file to load: PATH, default is "/your/path/pre_training.ckpt"
-    use_crf                         whether to use crf for evaluation. use_crf takes effect only when task type is NER, default is False
-    optimizer                       optimizer used in fine-tune network: AdamWeigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
-
-evaluation_config.py:
-    task                            task type: SeqLabeling | Regression | Classification | COLA
-    num_labels                      number of labels to do classsification
-    data_file                       dataset file to load: PATH, default is "/your/path/evaluation.tfrecord"
-    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
-    finetune_ckpt                   checkpoint file to load: PATH, default is "/your/path/your.ckpt"
-    use_crf                         whether to use crf for evaluation. use_crf takes effect only when task type is NER, default is False
-    clue_benchmark                  whether to use clue benchmark. clue_benchmark takes effect only when task type is NER, default is False
+scripts/run_ner.sh:
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    assessment_method               assessment method to do evaluation: f1 | clue_benchmark
+    use_crf                         whether to use crf to calculate loss: true | false
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to do labeling
+    vocab_file_path                 the vocabulary file that the BERT model was trained on
+    label2id_file_path              label to id json file
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            ner tfrecord for training. E.g., train.tfrecord
+    eval_data_file_path             ner tfrecord for predictions if f1 is used to evaluate result, ner json for predictions if clue_benchmark is used to evaluate result
+    schema_file_path                path to datafile schema file
+
+scripts/run_squad.sh:
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to classify, usually 2 for squad task
+    vocab_file_path                 the vocabulary file that the BERT model was trained on
+    eval_json_path                  path to squad dev json file
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            squad tfrecord for training. E.g., train1.1.tfrecord
+    eval_data_file_path             squad tfrecord for predictions. E.g., dev1.1.tfrecord
+    schema_file_path                path to datafile schema file
+
+scripts/run_classifier.sh
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    assessment_method               assessment method to do evaluation: accuracy | f1 | mcc | spearman_correlation
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to do labeling
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            tfrecord for training. E.g., train.tfrecord
+    eval_data_file_path             tfrecord for predictions. E.g., dev.tfrecord
+    schema_file_path                path to datafile schema file
+
+
 ```
 
 ### Parameters:
@@ -115,7 +129,7 @@ evaluation_config.py:
 Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation):
     batch_size                      batch size of input dataset: N, default is 16
     seq_length                      length of input sequence: N, default is 128
-    vocab_size                      size of each embedding vector: N, default is 21136
+    vocab_size                      size of each embedding vector: N, must be consistant with the dataset you use. Default is 21136
     hidden_size                     size of bert encoder layers: N, default is 768
     num_hidden_layers               number of hidden layers: N, default is 12
     num_attention_heads             number of attention heads: N, default is 12
diff --git a/model_zoo/bert/evaluation.py b/model_zoo/bert/evaluation.py
deleted file mode 100644
index 4e8b2a3aea..0000000000
--- a/model_zoo/bert/evaluation.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Bert evaluation script.
-"""
-
-import os
-import argparse
-import math
-import numpy as np
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore import log as logger
-from mindspore.common.tensor import Tensor
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from src.evaluation_config import cfg, bert_net_cfg
-from src.utils import BertNER, BertCLS, BertReg
-from src.CRF import postprocess
-from src.cluener_evaluation import submit
-from src.finetune_config import tag_to_index
-
-
-class Accuracy():
-    """
-    calculate accuracy
-    """
-    def __init__(self):
-        self.acc_num = 0
-        self.total_num = 0
-
-    def update(self, logits, labels):
-        """
-        Update accuracy
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        logits = logits.asnumpy()
-        logit_id = np.argmax(logits, axis=-1)
-        self.acc_num += np.sum(labels == logit_id)
-        self.total_num += len(labels)
-        print("=========================accuracy is ", self.acc_num / self.total_num)
-
-
-class F1():
-    """
-    calculate F1 score
-    """
-    def __init__(self):
-        self.TP = 0
-        self.FP = 0
-        self.FN = 0
-
-    def update(self, logits, labels):
-        """
-        update F1 score
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        if cfg.use_crf:
-            backpointers, best_tag_id = logits
-            best_path = postprocess(backpointers, best_tag_id)
-            logit_id = []
-            for ele in best_path:
-                logit_id.extend(ele)
-        else:
-            logits = logits.asnumpy()
-            logit_id = np.argmax(logits, axis=-1)
-            logit_id = np.reshape(logit_id, -1)
-        pos_eva = np.isin(logit_id, [i for i in range(1, cfg.num_labels)])
-        pos_label = np.isin(labels, [i for i in range(1, cfg.num_labels)])
-        self.TP += np.sum(pos_eva&pos_label)
-        self.FP += np.sum(pos_eva&(~pos_label))
-        self.FN += np.sum((~pos_eva)&pos_label)
-
-
-class MCC():
-    """
-    Calculate Matthews Correlation Coefficient.
-    """
-    def __init__(self):
-        self.TP = 0
-        self.FP = 0
-        self.FN = 0
-        self.TN = 0
-
-    def update(self, logits, labels):
-        """
-        Update MCC score
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        labels = labels.astype(np.bool)
-        logits = logits.asnumpy()
-        logit_id = np.argmax(logits, axis=-1)
-        logit_id = np.reshape(logit_id, -1)
-        logit_id = logit_id.astype(np.bool)
-        ornot = logit_id ^ labels
-
-        self.TP += (~ornot & labels).sum()
-        self.FP += (ornot & ~labels).sum()
-        self.FN += (ornot & labels).sum()
-        self.TN += (~ornot & ~labels).sum()
-
-
-class Spearman_Correlation():
-    """
-    calculate Spearman Correlation coefficient
-    """
-    def __init__(self):
-        self.label = []
-        self.logit = []
-
-    def update(self, logits, labels):
-        """
-        Update Spearman Correlation
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        logits = logits.asnumpy()
-        logits = np.reshape(logits, -1)
-        self.label.append(labels)
-        self.logit.append(logits)
-
-    def cal(self):
-        """
-        Calculate Spearman Correlation
-        """
-        label = np.concatenate(self.label)
-        logit = np.concatenate(self.logit)
-        sort_label = label.argsort()[::-1]
-        sort_logit = logit.argsort()[::-1]
-        n = len(label)
-        d_acc = 0
-        for i in range(n):
-            d = np.where(sort_label == i)[0] - np.where(sort_logit == i)[0]
-            d_acc += d**2
-        ps = 1 - 6*d_acc/n/(n**2-1)
-        return ps
-
-
-def get_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get dataset
-    """
-    _ = distribute_file
-
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "label_ids"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    if cfg.task == "Regression":
-        type_cast_op_float = C.TypeCast(mstype.float32)
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
-    else:
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    # apply shuffle operation
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-
-def bert_predict(Evaluation):
-    """
-    prediction function
-    """
-    target = args_opt.device_target
-    if target == "Ascend":
-        devid = int(os.getenv('DEVICE_ID'))
-        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid)
-    elif target == "GPU":
-        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-        if bert_net_cfg.compute_type != mstype.float32:
-            logger.warning('GPU only support fp32 temporarily, run with fp32.')
-            bert_net_cfg.compute_type = mstype.float32
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    dataset = get_dataset(bert_net_cfg.batch_size, 1)
-    if cfg.use_crf:
-        net_for_pretraining = Evaluation(bert_net_cfg, False, num_labels=len(tag_to_index), use_crf=True,
-                                         tag_to_index=tag_to_index, dropout_prob=0.0)
-    else:
-        net_for_pretraining = Evaluation(bert_net_cfg, False, num_labels)
-    net_for_pretraining.set_train(False)
-    param_dict = load_checkpoint(cfg.finetune_ckpt)
-    load_param_into_net(net_for_pretraining, param_dict)
-    model = Model(net_for_pretraining)
-    return model, dataset
-
-def test_eval():
-    """
-    evaluation function
-    """
-    if cfg.task == "SeqLabeling":
-        task_type = BertNER
-    elif cfg.task == "Regression":
-        task_type = BertReg
-    elif cfg.task == "Classification":
-        task_type = BertCLS
-    elif cfg.task == "COLA":
-        task_type = BertCLS
-    else:
-        raise ValueError("Task not supported.")
-    model, dataset = bert_predict(task_type)
-
-    if cfg.clue_benchmark:
-        submit(model, cfg.data_file, bert_net_cfg.seq_length)
-    else:
-        if cfg.task == "SeqLabeling":
-            callback = F1()
-        elif cfg.task == "COLA":
-            callback = MCC()
-        elif cfg.task == "Regression":
-            callback = Spearman_Correlation()
-        else:
-            callback = Accuracy()
-
-        columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
-        for data in dataset.create_dict_iterator():
-            input_data = []
-            for i in columns_list:
-                input_data.append(Tensor(data[i]))
-            input_ids, input_mask, token_type_id, label_ids = input_data
-            logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
-            callback.update(logits, label_ids)
-        print("==============================================================")
-        if cfg.task == "SeqLabeling":
-            print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
-            print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
-            print("F1 {:.6f} ".format(2*callback.TP / (2*callback.TP + callback.FP + callback.FN)))
-        elif cfg.task == "COLA":
-            TP = callback.TP
-            TN = callback.TN
-            FP = callback.FP
-            FN = callback.FN
-            mcc = (TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
-            print("MCC: {:.6f}".format(mcc))
-        elif cfg.task == "Regression":
-            print("Spearman Correlation is {:.6f}".format(callback.cal()[0]))
-        else:
-            print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
-                                                                      callback.acc_num / callback.total_num))
-        print("==============================================================")
-
-parser = argparse.ArgumentParser(description='Bert eval')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-if __name__ == "__main__":
-    num_labels = cfg.num_labels
-    test_eval()
diff --git a/model_zoo/bert/finetune.py b/model_zoo/bert/finetune.py
deleted file mode 100644
index eb1880b9cc..0000000000
--- a/model_zoo/bert/finetune.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Bert finetune script.
-"""
-
-import os
-import argparse
-from src.utils import BertFinetuneCell, BertCLS, BertNER, BertSquad, BertSquadCell, BertReg
-from src.finetune_config import cfg, bert_net_cfg, tag_to_index
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore import log as logger
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
-from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
-from mindspore.train.model import Model
-from mindspore.train.callback import Callback
-from mindspore.train.callback import CheckpointConfig, ModelCheckpoint
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-
-class LossCallBack(Callback):
-    """
-    Monitor the loss in training.
-    If the loss is NAN or INF, terminate training.
-    Note:
-        If per_print_times is 0, do not print loss.
-    Args:
-        per_print_times (int): Print loss every times. Default: 1.
-    """
-    def __init__(self, per_print_times=1):
-        super(LossCallBack, self).__init__()
-        if not isinstance(per_print_times, int) or per_print_times < 0:
-            raise ValueError("print_step must be in and >= 0.")
-        self._per_print_times = per_print_times
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        with open("./loss.log", "a+") as f:
-            f.write("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
-                                                                 str(cb_params.net_outputs)))
-            f.write("\n")
-
-def get_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get dataset
-    """
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "label_ids"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    if cfg.task == "Regression":
-        type_cast_op_float = C.TypeCast(mstype.float32)
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
-    else:
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    # apply shuffle operation
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def get_squad_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get SQuAD dataset
-    """
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask", "segment_ids",
-                                                                            "start_positions", "end_positions",
-                                                                            "unique_ids", "is_impossible"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="start_positions", operations=type_cast_op)
-    ds = ds.map(input_columns="end_positions", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def test_train():
-    """
-    finetune function
-    """
-    target = args_opt.device_target
-    if target == "Ascend":
-        devid = int(os.getenv('DEVICE_ID'))
-        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid)
-    elif target == "GPU":
-        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-        if bert_net_cfg.compute_type != mstype.float32:
-            logger.warning('GPU only support fp32 temporarily, run with fp32.')
-            bert_net_cfg.compute_type = mstype.float32
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    #BertCLSTrain for classification
-    #BertNERTrain for sequence labeling
-    if cfg.task == 'SeqLabeling':
-        if cfg.use_crf:
-            netwithloss = BertNER(bert_net_cfg, True, num_labels=len(tag_to_index), use_crf=True,
-                                  tag_to_index=tag_to_index, dropout_prob=0.1)
-        else:
-            netwithloss = BertNER(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    elif cfg.task == 'SQUAD':
-        netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)
-    elif cfg.task == 'Regression':
-        netwithloss = BertReg(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    elif cfg.task == 'Classification':
-        netwithloss = BertCLS(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    if cfg.task == 'SQUAD':
-        dataset = get_squad_dataset(bert_net_cfg.batch_size, cfg.epoch_num)
-    else:
-        dataset = get_dataset(bert_net_cfg.batch_size, cfg.epoch_num)
-    # optimizer
-    steps_per_epoch = dataset.get_dataset_size()
-    if cfg.optimizer == 'AdamWeightDecayDynamicLR':
-        optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(),
-                                             decay_steps=steps_per_epoch * cfg.epoch_num,
-                                             learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate,
-                                             end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate,
-                                             power=cfg.AdamWeightDecayDynamicLR.power,
-                                             warmup_steps=int(steps_per_epoch * cfg.epoch_num * 0.1),
-                                             weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay,
-                                             eps=cfg.AdamWeightDecayDynamicLR.eps)
-    elif cfg.optimizer == 'Lamb':
-        optimizer = Lamb(netwithloss.trainable_params(), decay_steps=steps_per_epoch * cfg.epoch_num,
-                         start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate,
-                         power=cfg.Lamb.power, weight_decay=cfg.Lamb.weight_decay,
-                         warmup_steps=int(steps_per_epoch * cfg.epoch_num * 0.1), decay_filter=cfg.Lamb.decay_filter)
-    elif cfg.optimizer == 'Momentum':
-        optimizer = Momentum(netwithloss.trainable_params(), learning_rate=cfg.Momentum.learning_rate,
-                             momentum=cfg.Momentum.momentum)
-    else:
-        raise Exception("Optimizer not supported.")
-    # load checkpoint into network
-    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
-    ckpoint_cb = ModelCheckpoint(prefix=cfg.ckpt_prefix, directory=cfg.ckpt_dir, config=ckpt_config)
-    param_dict = load_checkpoint(cfg.pre_training_ckpt)
-    load_param_into_net(netwithloss, param_dict)
-
-    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
-    if cfg.task == 'SQUAD':
-        netwithgrads = BertSquadCell(netwithloss, optimizer=optimizer, scale_update_cell=update_cell)
-    else:
-        netwithgrads = BertFinetuneCell(netwithloss, optimizer=optimizer, scale_update_cell=update_cell)
-    model = Model(netwithgrads)
-    model.train(cfg.epoch_num, dataset, callbacks=[LossCallBack(), ckpoint_cb])
-
-
-parser = argparse.ArgumentParser(description='Bert finetune')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-if __name__ == "__main__":
-    test_train()
diff --git a/model_zoo/bert/run_classifier.py b/model_zoo/bert/run_classifier.py
new file mode 100644
index 0000000000..4b2801f87c
--- /dev/null
+++ b/model_zoo/bert/run_classifier.py
@@ -0,0 +1,201 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+
+import os
+import argparse
+from src.bert_for_finetune import BertFinetuneCell, BertCLS
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_classification_dataset
+from src.assessment_method import Accuracy, F1, MCC, Spearman_Correlation
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="classifier", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+def eval_result_print(assessment_method="accuracy", callback=None):
+    """ print eval result """
+    if assessment_method == "accuracy":
+        print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
+                                                                  callback.acc_num / callback.total_num))
+    elif assessment_method == "f1":
+        print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
+        print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
+        print("F1 {:.6f} ".format(2 * callback.TP / (2 * callback.TP + callback.FP + callback.FN)))
+    elif assessment_method == "mcc":
+        print("MCC {:.6f} ".format(callback.cal()))
+    elif assessment_method == "spearman_correlation":
+        print("Spearman Correlation is {:.6f} ".format(callback.cal()[0]))
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy", load_checkpoint_path=""):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    net_for_pretraining = network(bert_net_cfg, False, num_class)
+    net_for_pretraining.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net_for_pretraining, param_dict)
+    model = Model(net_for_pretraining)
+
+    if assessment_method == "accuracy":
+        callback = Accuracy()
+    elif assessment_method == "f1":
+        callback = F1(False, num_class)
+    elif assessment_method == "mcc":
+        callback = MCC()
+    elif assessment_method == "spearman_correlation":
+        callback = Spearman_Correlation()
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+    columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
+    for data in dataset.create_dict_iterator():
+        input_data = []
+        for i in columns_list:
+            input_data.append(Tensor(data[i]))
+        input_ids, input_mask, token_type_id, label_ids = input_data
+        logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
+        callback.update(logits, label_ids)
+    print("==============================================================")
+    eval_result_print(assessment_method, callback)
+    print("==============================================================")
+
+def run_classifier():
+    """run classifier task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--assessment_method", type=str, default="accuracy", help="assessment_method include: "
+                                                                                  "[MCC, Spearman_correlation, "
+                                                                                  "Accuracy], default is accuracy")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    assessment_method = args_opt.assessment_method.lower()
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
+        raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    netwithloss = BertCLS(bert_net_cfg, True, num_labels=args_opt.num_class, dropout_prob=0.1,
+                          assessment_method=assessment_method)
+
+    if args_opt.do_train.lower() == "true":
+        ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                           assessment_method=assessment_method,
+                                           data_file_path=args_opt.train_data_file_path,
+                                           schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "classifier")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                           assessment_method=assessment_method,
+                                           data_file_path=args_opt.eval_data_file_path,
+                                           schema_file_path=args_opt.schema_file_path)
+        do_eval(ds, BertCLS, args_opt.num_class, assessment_method, load_finetune_checkpoint_path)
+
+if __name__ == "__main__":
+    run_classifier()
diff --git a/model_zoo/bert/run_ner.py b/model_zoo/bert/run_ner.py
new file mode 100644
index 0000000000..a61c96066e
--- /dev/null
+++ b/model_zoo/bert/run_ner.py
@@ -0,0 +1,228 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+
+import os
+import json
+import argparse
+from src.bert_for_finetune import BertFinetuneCell, BertNER
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_ner_dataset
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+from src.assessment_method import Accuracy, F1, MCC, Spearman_Correlation
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="ner", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+def eval_result_print(assessment_method="accuracy", callback=None):
+    """print eval result"""
+    if assessment_method == "accuracy":
+        print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
+                                                                  callback.acc_num / callback.total_num))
+    elif assessment_method == "f1":
+        print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
+        print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
+        print("F1 {:.6f} ".format(2 * callback.TP / (2 * callback.TP + callback.FP + callback.FN)))
+    elif assessment_method == "mcc":
+        print("MCC {:.6f} ".format(callback.cal()))
+    elif assessment_method == "spearman_correlation":
+        print("Spearman Correlation is {:.6f} ".format(callback.cal()[0]))
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_method="accuracy", data_file="",
+            load_checkpoint_path="", vocab_file="", label2id_file="", tag_to_index=None):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    if assessment_method == "clue_benchmark":
+        bert_net_cfg.batch_size = 1
+    net_for_pretraining = network(bert_net_cfg, False, num_class, use_crf=(use_crf.lower() == "true"),
+                                  tag_to_index=tag_to_index)
+    net_for_pretraining.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net_for_pretraining, param_dict)
+    model = Model(net_for_pretraining)
+
+    if assessment_method == "clue_benchmark":
+        from src.cluener_evaluation import submit
+        submit(model=model, path=data_file, vocab_file=vocab_file, use_crf=use_crf, label2id_file=label2id_file)
+    else:
+        if assessment_method == "accuracy":
+            callback = Accuracy()
+        elif assessment_method == "f1":
+            callback = F1((use_crf.lower() == "true"), num_class)
+        elif assessment_method == "mcc":
+            callback = MCC()
+        elif assessment_method == "spearman_correlation":
+            callback = Spearman_Correlation()
+        else:
+            raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+        columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
+        for data in dataset.create_dict_iterator():
+            input_data = []
+            for i in columns_list:
+                input_data.append(Tensor(data[i]))
+            input_ids, input_mask, token_type_id, label_ids = input_data
+            logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
+            callback.update(logits, label_ids)
+        print("==============================================================")
+        eval_result_print(assessment_method, callback)
+        print("==============================================================")
+
+def run_ner():
+    """run ner task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--assessment_method", type=str, default="accuracy", help="assessment_method include: "
+                                                                                  "[F1, clue_benchmark], default is F1")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--use_crf", type=str, default="false", help="Use crf, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path, used in clue benchmark")
+    parser.add_argument("--label2id_file_path", type=str, default="", help="label2id file path, used in clue benchmark")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    assessment_method = args_opt.assessment_method.lower()
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
+        raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+    if args_opt.assessment_method.lower() == "clue_benchmark" and args_opt.vocab_file_path == "":
+        raise ValueError("'vocab_file_path' must be set to do clue benchmark")
+    if args_opt.use_crf.lower() == "true" and args_opt.label2id_file_path == "":
+        raise ValueError("'label2id_file_path' must be set to use crf")
+    if args_opt.assessment_method.lower() == "clue_benchmark" and args_opt.label2id_file_path == "":
+        raise ValueError("'label2id_file_path' must be set to do clue benchmark")
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    tag_to_index = None
+    if args_opt.use_crf.lower() == "true":
+        with open(args_opt.label2id_file_path) as json_file:
+            tag_to_index = json.load(json_file)
+        max_val = max(tag_to_index.values())
+        tag_to_index["<START>"] = max_val + 1
+        tag_to_index["<STOP>"] = max_val + 2
+        number_labels = len(tag_to_index)
+    else:
+        number_labels = args_opt.num_class
+    netwithloss = BertNER(bert_net_cfg, True, num_labels=number_labels,
+                          use_crf=(args_opt.use_crf.lower() == "true"),
+                          tag_to_index=tag_to_index, dropout_prob=0.1)
+    if args_opt.do_train.lower() == "true":
+        ds = create_ner_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path,
+                                schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "ner")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_ner_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path,
+                                schema_file_path=args_opt.schema_file_path)
+        do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path,
+                load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label2id_file_path, tag_to_index)
+
+if __name__ == "__main__":
+    run_ner()
diff --git a/model_zoo/bert/run_pretrain.py b/model_zoo/bert/run_pretrain.py
index 65768946c1..7123c942f3 100644
--- a/model_zoo/bert/run_pretrain.py
+++ b/model_zoo/bert/run_pretrain.py
@@ -26,33 +26,16 @@ from mindspore import context
 from mindspore.train.model import Model
 from mindspore.train.parallel_utils import ParallelMode
 from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
-from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig, TimeMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR
 from mindspore import log as logger
 from src import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell
 from src.dataset import create_bert_dataset
 from src.config import cfg, bert_net_cfg
+from src.utils import LossCallBack
 _current_dir = os.path.dirname(os.path.realpath(__file__))
 
-class LossCallBack(Callback):
-    """
-    Monitor the loss in training.
-    If the loss in NAN or INF terminating training.
-    Note:
-        if per_print_times is 0 do not print loss.
-    Args:
-        per_print_times (int): Print loss every times. Default: 1.
-    """
-    def __init__(self, per_print_times=1):
-        super(LossCallBack, self).__init__()
-        if not isinstance(per_print_times, int) or per_print_times < 0:
-            raise ValueError("print_step must be int and >= 0")
-        self._per_print_times = per_print_times
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
-                                                           str(cb_params.net_outputs)))
 
 def run_pretrain():
     """pre-train bert_clue"""
diff --git a/model_zoo/bert/run_squad.py b/model_zoo/bert/run_squad.py
new file mode 100644
index 0000000000..083cedac1d
--- /dev/null
+++ b/model_zoo/bert/run_squad.py
@@ -0,0 +1,204 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+import os
+import argparse
+import collections
+from src.bert_for_finetune import BertSquadCell, BertSquad
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_squad_dataset
+from src import tokenization
+from src.create_squad_data import read_squad_examples, convert_examples_to_features
+from src.run_squad import write_predictions
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="squad", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertSquadCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+
+def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", seq_length=384):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
+    eval_examples = read_squad_examples(eval_json, False)
+    eval_features = convert_examples_to_features(
+        examples=eval_examples,
+        tokenizer=tokenizer,
+        max_seq_length=seq_length,
+        doc_stride=128,
+        max_query_length=64,
+        is_training=False,
+        output_fn=None,
+        verbose_logging=False)
+
+    net = BertSquad(bert_net_cfg, False, 2)
+    net.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net, param_dict)
+    model = Model(net)
+    output = []
+    RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
+    columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
+    for data in dataset.create_dict_iterator():
+        input_data = []
+        for i in columns_list:
+            input_data.append(Tensor(data[i]))
+        input_ids, input_mask, segment_ids, unique_ids = input_data
+        start_positions = Tensor([1], mstype.float32)
+        end_positions = Tensor([1], mstype.float32)
+        is_impossible = Tensor([1], mstype.float32)
+        logits = model.predict(input_ids, input_mask, segment_ids, start_positions,
+                               end_positions, unique_ids, is_impossible)
+        ids = logits[0].asnumpy()
+        start = logits[1].asnumpy()
+        end = logits[2].asnumpy()
+
+        for i in range(bert_net_cfg.batch_size):
+            unique_id = int(ids[i])
+            start_logits = [float(x) for x in start[i].flat]
+            end_logits = [float(x) for x in end[i].flat]
+            output.append(RawResult(
+                unique_id=unique_id,
+                start_logits=start_logits,
+                end_logits=end_logits))
+    write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None)
+
+def run_squad():
+    """run squad task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path")
+    parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true":
+        if args_opt.eval_data_file_path == "":
+            raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+        if args_opt.vocab_file_path == "":
+            raise ValueError("'vocab_file_path' must be set when do evaluation task")
+        if args_opt.eval_json_path == "":
+            raise ValueError("'tokenization_file_path' must be set when do evaluation task")
+
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)
+
+    if args_opt.do_train.lower() == "true":
+        ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                  data_file_path=args_opt.train_data_file_path,
+                                  schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "squad")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                  data_file_path=args_opt.eval_data_file_path,
+                                  schema_file_path=args_opt.schema_file_path, is_training=False)
+        do_eval(ds, args_opt.vocab_file_path, args_opt.eval_json_path,
+                load_finetune_checkpoint_path, bert_net_cfg.seq_length)
+
+if __name__ == "__main__":
+    run_squad()
diff --git a/model_zoo/bert/scripts/run_classifier.sh b/model_zoo/bert/scripts/run_classifier.sh
new file mode 100644
index 0000000000..275324b950
--- /dev/null
+++ b/model_zoo/bert/scripts/run_classifier.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_classifier.sh"
+echo "for example: bash scripts/run_classifier.sh"
+echo "assessment_method include: [MCC, Spearman_correlation ,Accuracy]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_classifier.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --assessment_method="Accuracy" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_distribute_pretrain.sh b/model_zoo/bert/scripts/run_distribute_pretrain.sh
index 5a9f8735aa..eb3a0979d1 100644
--- a/model_zoo/bert/scripts/run_distribute_pretrain.sh
+++ b/model_zoo/bert/scripts/run_distribute_pretrain.sh
@@ -24,8 +24,7 @@ echo "==========================================================================
 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4
-
-export MINDSPORE_HCCL_CONFIG_PATH=$5
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 export RANK_TABLE_FILE=$5
 export RANK_SIZE=$1
 cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
@@ -54,7 +53,7 @@ do
     export GLOG_log_dir=${CUR_DIR}/ms_log
     export GLOG_logtostderr=0
     env > env.log
-    taskset -c $cmdopt python ../run_pretrain.py  \
+    taskset -c $cmdopt python ${PROJECT_DIR}/../run_pretrain.py  \
     --distribute="true" \
     --epoch_size=$EPOCH_SIZE \
     --device_id=$DEVICE_ID \
diff --git a/model_zoo/bert/scripts/run_ner.sh b/model_zoo/bert/scripts/run_ner.sh
new file mode 100644
index 0000000000..ae401b2462
--- /dev/null
+++ b/model_zoo/bert/scripts/run_ner.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_ner.sh"
+echo "for example: bash scripts/run_ner.sh"
+echo "assessment_method include: [F1, clue_benchmark]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_ner.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --assessment_method="F1" \
+    --use_crf="false" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --vocab_file_path="" \
+    --label2id_file_path="" \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_squad.sh b/model_zoo/bert/scripts/run_squad.sh
new file mode 100644
index 0000000000..a33950cadb
--- /dev/null
+++ b/model_zoo/bert/scripts/run_squad.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_squad.sh"
+echo "for example: bash scripts/run_squad.sh"
+echo "assessment_method include: [Accuracy]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_squad.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --vocab_file_path="" \
+    --eval_json_path="" \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_standalone_pretrain.sh b/model_zoo/bert/scripts/run_standalone_pretrain.sh
index 3cd9545f7f..f59eb69601 100644
--- a/model_zoo/bert/scripts/run_standalone_pretrain.sh
+++ b/model_zoo/bert/scripts/run_standalone_pretrain.sh
@@ -26,10 +26,11 @@ DATA_DIR=$3
 SCHEMA_DIR=$4
 
 mkdir -p ms_log 
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 CUR_DIR=`pwd`
 export GLOG_log_dir=${CUR_DIR}/ms_log
 export GLOG_logtostderr=0
-python run_pretrain.py  \
+python ${PROJECT_DIR}/../run_pretrain.py  \
     --distribute="false" \
     --epoch_size=$EPOCH_SIZE \
     --device_id=$DEVICE_ID \
diff --git a/model_zoo/bert/squadeval.py b/model_zoo/bert/squadeval.py
deleted file mode 100644
index 49027acd6d..0000000000
--- a/model_zoo/bert/squadeval.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Evaluation script for SQuAD task"""
-
-import os
-import collections
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from src import tokenization
-from src.evaluation_config import cfg, bert_net_cfg
-from src.utils import BertSquad
-from src.create_squad_data import read_squad_examples, convert_examples_to_features
-from src.run_squad import write_predictions
-
-def get_squad_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """get SQuAD dataset from tfrecord"""
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "unique_ids"],
-                            shuffle=False)
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def test_eval():
-    """Evaluation function for SQuAD task"""
-    tokenizer = tokenization.FullTokenizer(vocab_file="./vocab.txt", do_lower_case=True)
-    input_file = "dataset/v1.1/dev-v1.1.json"
-    eval_examples = read_squad_examples(input_file, False)
-    eval_features = convert_examples_to_features(
-        examples=eval_examples,
-        tokenizer=tokenizer,
-        max_seq_length=384,
-        doc_stride=128,
-        max_query_length=64,
-        is_training=False,
-        output_fn=None,
-        verbose_logging=False)
-
-    device_id = int(os.getenv('DEVICE_ID'))
-    context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', device_id=device_id)
-    dataset = get_squad_dataset(bert_net_cfg.batch_size, 1)
-    net = BertSquad(bert_net_cfg, False, 2)
-    net.set_train(False)
-    param_dict = load_checkpoint(cfg.finetune_ckpt)
-    load_param_into_net(net, param_dict)
-    model = Model(net)
-    output = []
-    RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
-    columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
-    for data in dataset.create_dict_iterator():
-        input_data = []
-        for i in columns_list:
-            input_data.append(Tensor(data[i]))
-        input_ids, input_mask, segment_ids, unique_ids = input_data
-        start_positions = Tensor([1], mstype.float32)
-        end_positions = Tensor([1], mstype.float32)
-        is_impossible = Tensor([1], mstype.float32)
-        logits = model.predict(input_ids, input_mask, segment_ids, start_positions,
-                               end_positions, unique_ids, is_impossible)
-        ids = logits[0].asnumpy()
-        start = logits[1].asnumpy()
-        end = logits[2].asnumpy()
-
-        for i in range(bert_net_cfg.batch_size):
-            unique_id = int(ids[i])
-            start_logits = [float(x) for x in start[i].flat]
-            end_logits = [float(x) for x in end[i].flat]
-            output.append(RawResult(
-                unique_id=unique_id,
-                start_logits=start_logits,
-                end_logits=end_logits))
-    write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json",
-                      None, None, False, False)
-
-
-if __name__ == "__main__":
-    test_eval()
diff --git a/model_zoo/bert/src/assessment_method.py b/model_zoo/bert/src/assessment_method.py
new file mode 100644
index 0000000000..ca6579cabf
--- /dev/null
+++ b/model_zoo/bert/src/assessment_method.py
@@ -0,0 +1,134 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert evaluation assessment method script.
+'''
+import math
+import numpy as np
+from .CRF import postprocess
+
+class Accuracy():
+    '''
+    calculate accuracy
+    '''
+    def __init__(self):
+        self.acc_num = 0
+        self.total_num = 0
+    def update(self, logits, labels):
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        logits = logits.asnumpy()
+        logit_id = np.argmax(logits, axis=-1)
+        self.acc_num += np.sum(labels == logit_id)
+        self.total_num += len(labels)
+        print("=========================accuracy is ", self.acc_num / self.total_num)
+
+class F1():
+    '''
+    calculate F1 score
+    '''
+    def __init__(self, use_crf=False, num_labels=2):
+        self.TP = 0
+        self.FP = 0
+        self.FN = 0
+        self.use_crf = use_crf
+        self.num_labels = num_labels
+
+    def update(self, logits, labels):
+        '''
+        update F1 score
+        '''
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        if self.use_crf:
+            backpointers, best_tag_id = logits
+            best_path = postprocess(backpointers, best_tag_id)
+            logit_id = []
+            for ele in best_path:
+                logit_id.extend(ele)
+        else:
+            logits = logits.asnumpy()
+            logit_id = np.argmax(logits, axis=-1)
+            logit_id = np.reshape(logit_id, -1)
+        pos_eva = np.isin(logit_id, [i for i in range(1, self.num_labels)])
+        pos_label = np.isin(labels, [i for i in range(1, self.num_labels)])
+        self.TP += np.sum(pos_eva&pos_label)
+        self.FP += np.sum(pos_eva&(~pos_label))
+        self.FN += np.sum((~pos_eva)&pos_label)
+
+class MCC():
+    '''
+    Calculate Matthews Correlation Coefficient
+    '''
+    def __init__(self):
+        self.TP = 0
+        self.FP = 0
+        self.FN = 0
+        self.TN = 0
+    def update(self, logits, labels):
+        '''
+        MCC update
+        '''
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        labels = labels.astype(np.bool)
+        logits = logits.asnumpy()
+        logit_id = np.argmax(logits, axis=-1)
+        logit_id = np.reshape(logit_id, -1)
+        logit_id = logit_id.astype(np.bool)
+        ornot = logit_id ^ labels
+
+        self.TP += (~ornot & labels).sum()
+        self.FP += (ornot & ~labels).sum()
+        self.FN += (ornot & labels).sum()
+        self.TN += (~ornot & ~labels).sum()
+
+    def cal(self):
+        mcc = (self.TP*self.TN - self.FP*self.FN)/math.sqrt((self.TP+self.FP)*(self.TP+self.FN) *
+                                                            (self.TN+self.FP)*(self.TN+self.FN))
+        return mcc
+
+class Spearman_Correlation():
+    '''
+    Calculate Spearman Correlation Coefficient
+    '''
+    def __init__(self):
+        self.label = []
+        self.logit = []
+
+    def update(self, logits, labels):
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        logits = logits.asnumpy()
+        logits = np.reshape(logits, -1)
+        self.label.append(labels)
+        self.logit.append(logits)
+
+    def cal(self):
+        '''
+        Calculate Spearman Correlation
+        '''
+        label = np.concatenate(self.label)
+        logit = np.concatenate(self.logit)
+        sort_label = label.argsort()[::-1]
+        sort_logit = logit.argsort()[::-1]
+        n = len(label)
+        d_acc = 0
+        for i in range(n):
+            d = np.where(sort_label == i)[0] - np.where(sort_logit == i)[0]
+            d_acc += d**2
+        ps = 1 - 6*d_acc/n/(n**2-1)
+        return ps
diff --git a/model_zoo/bert/src/bert_for_finetune.py b/model_zoo/bert/src/bert_for_finetune.py
new file mode 100644
index 0000000000..32ac0823b9
--- /dev/null
+++ b/model_zoo/bert/src/bert_for_finetune.py
@@ -0,0 +1,327 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert for finetune script.
+'''
+
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore.ops import composite as C
+from mindspore.common.tensor import Tensor
+from mindspore.common.parameter import Parameter, ParameterTuple
+from mindspore.common import dtype as mstype
+from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
+from mindspore.train.parallel_utils import ParallelMode
+from mindspore.communication.management import get_group_size
+from mindspore import context
+from .bert_for_pre_training import clip_grad
+from .finetune_eval_model import BertCLSModel, BertNERModel, BertSquadModel
+from .utils import CrossEntropyCalculation
+
+
+GRADIENT_CLIP_TYPE = 1
+GRADIENT_CLIP_VALUE = 1.0
+grad_scale = C.MultitypeFuncGraph("grad_scale")
+reciprocal = P.Reciprocal()
+@grad_scale.register("Tensor", "Tensor")
+def tensor_grad_scale(scale, grad):
+    return grad * reciprocal(scale)
+
+_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
+grad_overflow = P.FloatStatus()
+@_grad_overflow.register("Tensor")
+def _tensor_grad_overflow(grad):
+    return grad_overflow(grad)
+
+class BertFinetuneCell(nn.Cell):
+    """
+    Especifically defined for finetuning where only four inputs tensor are needed.
+    """
+    def __init__(self, network, optimizer, scale_update_cell=None):
+
+        super(BertFinetuneCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+        self.optimizer = optimizer
+        self.grad = C.GradOperation('grad',
+                                    get_by_list=True,
+                                    sens_param=True)
+        self.reducer_flag = False
+        self.allreduce = P.AllReduce()
+        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
+        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+            self.reducer_flag = True
+        self.grad_reducer = None
+        if self.reducer_flag:
+            mean = context.get_auto_parallel_context("mirror_mean")
+            degree = get_group_size()
+            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
+        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
+        self.cast = P.Cast()
+        self.gpu_target = False
+        if context.get_context("device_target") == "GPU":
+            self.gpu_target = True
+            self.float_status = P.FloatStatus()
+            self.addn = P.AddN()
+            self.reshape = P.Reshape()
+        else:
+            self.alloc_status = P.NPUAllocFloatStatus()
+            self.get_status = P.NPUGetFloatStatus()
+            self.clear_before_grad = P.NPUClearFloatStatus()
+        self.reduce_sum = P.ReduceSum(keep_dims=False)
+        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
+        self.base = Tensor(1, mstype.float32)
+        self.less_equal = P.LessEqual()
+        self.hyper_map = C.HyperMap()
+        self.loss_scale = None
+        self.loss_scaling_manager = scale_update_cell
+        if scale_update_cell:
+            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
+                                        name="loss_scale")
+
+    def construct(self,
+                  input_ids,
+                  input_mask,
+                  token_type_id,
+                  label_ids,
+                  sens=None):
+
+
+        weights = self.weights
+        init = False
+        loss = self.network(input_ids,
+                            input_mask,
+                            token_type_id,
+                            label_ids)
+        if sens is None:
+            scaling_sens = self.loss_scale
+        else:
+            scaling_sens = sens
+
+        if not self.gpu_target:
+            init = self.alloc_status()
+            clear_before_grad = self.clear_before_grad(init)
+            F.control_depend(loss, init)
+            self.depend_parameter_use(clear_before_grad, scaling_sens)
+        grads = self.grad(self.network, weights)(input_ids,
+                                                 input_mask,
+                                                 token_type_id,
+                                                 label_ids,
+                                                 self.cast(scaling_sens,
+                                                           mstype.float32))
+        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
+        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
+        if self.reducer_flag:
+            grads = self.grad_reducer(grads)
+        if not self.gpu_target:
+            flag = self.get_status(init)
+            flag_sum = self.reduce_sum(init, (0,))
+            F.control_depend(grads, flag)
+            F.control_depend(flag, flag_sum)
+        else:
+            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
+            flag_sum = self.addn(flag_sum)
+            flag_sum = self.reshape(flag_sum, (()))
+        if self.is_distributed:
+            flag_reduce = self.allreduce(flag_sum)
+            cond = self.less_equal(self.base, flag_reduce)
+        else:
+            cond = self.less_equal(self.base, flag_sum)
+        overflow = cond
+        if sens is None:
+            overflow = self.loss_scaling_manager(self.loss_scale, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
+
+class BertSquadCell(nn.Cell):
+    """
+    specifically defined for finetuning where only four inputs tensor are needed.
+    """
+    def __init__(self, network, optimizer, scale_update_cell=None):
+        super(BertSquadCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+        self.optimizer = optimizer
+        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
+        self.reducer_flag = False
+        self.allreduce = P.AllReduce()
+        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
+        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+            self.reducer_flag = True
+        self.grad_reducer = None
+        if self.reducer_flag:
+            mean = context.get_auto_parallel_context("mirror_mean")
+            degree = get_group_size()
+            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
+        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
+        self.cast = P.Cast()
+        self.alloc_status = P.NPUAllocFloatStatus()
+        self.get_status = P.NPUGetFloatStatus()
+        self.clear_before_grad = P.NPUClearFloatStatus()
+        self.reduce_sum = P.ReduceSum(keep_dims=False)
+        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
+        self.base = Tensor(1, mstype.float32)
+        self.less_equal = P.LessEqual()
+        self.hyper_map = C.HyperMap()
+        self.loss_scale = None
+        self.loss_scaling_manager = scale_update_cell
+        if scale_update_cell:
+            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
+                                        name="loss_scale")
+    def construct(self,
+                  input_ids,
+                  input_mask,
+                  token_type_id,
+                  start_position,
+                  end_position,
+                  unique_id,
+                  is_impossible,
+                  sens=None):
+        weights = self.weights
+        init = self.alloc_status()
+        loss = self.network(input_ids,
+                            input_mask,
+                            token_type_id,
+                            start_position,
+                            end_position,
+                            unique_id,
+                            is_impossible)
+        if sens is None:
+            scaling_sens = self.loss_scale
+        else:
+            scaling_sens = sens
+        grads = self.grad(self.network, weights)(input_ids,
+                                                 input_mask,
+                                                 token_type_id,
+                                                 start_position,
+                                                 end_position,
+                                                 unique_id,
+                                                 is_impossible,
+                                                 self.cast(scaling_sens,
+                                                           mstype.float32))
+        clear_before_grad = self.clear_before_grad(init)
+        F.control_depend(loss, init)
+        self.depend_parameter_use(clear_before_grad, scaling_sens)
+        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
+        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
+        if self.reducer_flag:
+            grads = self.grad_reducer(grads)
+        flag = self.get_status(init)
+        flag_sum = self.reduce_sum(init, (0,))
+        if self.is_distributed:
+            flag_reduce = self.allreduce(flag_sum)
+            cond = self.less_equal(self.base, flag_reduce)
+        else:
+            cond = self.less_equal(self.base, flag_sum)
+        F.control_depend(grads, flag)
+        F.control_depend(flag, flag_sum)
+        overflow = cond
+        if sens is None:
+            overflow = self.loss_scaling_manager(self.loss_scale, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
+
+class BertCLS(nn.Cell):
+    """
+    Train interface for classification finetuning task.
+    """
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
+                 assessment_method=""):
+        super(BertCLS, self).__init__()
+        self.bert = BertCLSModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings,
+                                 assessment_method)
+        self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.assessment_method = assessment_method
+        self.is_training = is_training
+    def construct(self, input_ids, input_mask, token_type_id, label_ids):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.assessment_method == "spearman_correlation":
+            if self.is_training:
+                loss = self.loss(logits, label_ids)
+            else:
+                loss = logits
+        else:
+            loss = self.loss(logits, label_ids, self.num_labels)
+        return loss
+
+
+class BertNER(nn.Cell):
+    """
+    Train interface for sequence labeling finetuning task.
+    """
+    def __init__(self, config, is_training, num_labels=11, use_crf=False, tag_to_index=None, dropout_prob=0.0,
+                 use_one_hot_embeddings=False):
+        super(BertNER, self).__init__()
+        self.bert = BertNERModel(config, is_training, num_labels, use_crf, dropout_prob, use_one_hot_embeddings)
+        if use_crf:
+            if not tag_to_index:
+                raise Exception("The dict for tag-index mapping should be provided for CRF.")
+            from src.CRF import CRF
+            self.loss = CRF(tag_to_index, config.batch_size, config.seq_length, is_training)
+        else:
+            self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.use_crf = use_crf
+    def construct(self, input_ids, input_mask, token_type_id, label_ids):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.use_crf:
+            loss = self.loss(logits, label_ids)
+        else:
+            loss = self.loss(logits, label_ids, self.num_labels)
+        return loss
+
+class BertSquad(nn.Cell):
+    '''
+    Train interface for SQuAD finetuning task.
+    '''
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
+        super(BertSquad, self).__init__()
+        self.bert = BertSquadModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
+        self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.seq_length = config.seq_length
+        self.is_training = is_training
+        self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
+        self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
+        self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
+        self.sum = P.ReduceSum()
+        self.equal = P.Equal()
+        self.argmax = P.ArgMaxWithValue(axis=1)
+        self.squeeze = P.Squeeze(axis=-1)
+
+    def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.is_training:
+            unstacked_logits_0 = self.squeeze(logits[:, :, 0:1])
+            unstacked_logits_1 = self.squeeze(logits[:, :, 1:2])
+            start_loss = self.loss(unstacked_logits_0, start_position, self.seq_length)
+            end_loss = self.loss(unstacked_logits_1, end_position, self.seq_length)
+            total_loss = (start_loss + end_loss) / 2.0
+        else:
+            start_logits = self.squeeze(logits[:, :, 0:1])
+            end_logits = self.squeeze(logits[:, :, 1:2])
+            total_loss = (unique_id, start_logits, end_logits)
+        return total_loss
diff --git a/model_zoo/bert/src/clue_classification_dataset_process.py b/model_zoo/bert/src/clue_classification_dataset_process.py
new file mode 100755
index 0000000000..1e27fe0352
--- /dev/null
+++ b/model_zoo/bert/src/clue_classification_dataset_process.py
@@ -0,0 +1,153 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""
+sample script of processing CLUE classification dataset using mindspore.dataset.text for fine-tuning bert
+"""
+
+import os
+import numpy as np
+
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.text as text
+import mindspore.dataset.transforms.c_transforms as ops
+
+
+def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path,
+                               data_usage='train', shuffle_dataset=False, max_seq_len=128, batch_size=64):
+    """Process TNEWS dataset"""
+    ### Loading TNEWS from CLUEDataset
+    assert data_usage in ['train', 'eval', 'test']
+    if data_usage == 'train':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "train.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    elif data_usage == 'eval':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "dev.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    else:
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "test.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    ### Processing label
+    if data_usage == 'test':
+        dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
+                              columns_order=["id", "label_id", "sentence"], operations=ops.Duplicate())
+        dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
+    else:
+        label_vocab = text.Vocab.from_list(label_list)
+        label_lookup = text.Lookup(label_vocab)
+        dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup)
+    ### Processing sentence
+    vocab = text.Vocab.from_file(bert_vocab_path)
+    tokenizer = text.BertTokenizer(vocab, lower_case=True)
+    lookup = text.Lookup(vocab, unknown_token='[UNK]')
+    dataset = dataset.map(input_columns=["sentence"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len)))
+    dataset = dataset.map(input_columns=["sentence"],
+                          operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
+                                                     append=np.array(["[SEP]"], dtype='S')))
+    dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup)
+    dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids", "segment_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0))
+    dataset = dataset.batch(batch_size)
+    label = []
+    text_ids = []
+    mask_ids = []
+    segment_ids = []
+    for data in dataset:
+        label.append(data[0])
+        text_ids.append(data[1])
+        mask_ids.append(data[2])
+        segment_ids.append(data[3])
+    return label, text_ids, mask_ids, segment_ids
+
+
+def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path,
+                               data_usage='train', shuffle_dataset=False, max_seq_len=128, batch_size=64):
+    """Process CMNLI dataset"""
+    ### Loading CMNLI from CLUEDataset
+    assert data_usage in ['train', 'eval', 'test']
+    if data_usage == 'train':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "train.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    elif data_usage == 'eval':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "dev.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    else:
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "test.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    ### Processing label
+    if data_usage == 'test':
+        dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
+                              columns_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate())
+        dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
+    else:
+        label_vocab = text.Vocab.from_list(label_list)
+        label_lookup = text.Lookup(label_vocab)
+        dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup)
+    ### Processing sentence pairs
+    vocab = text.Vocab.from_file(bert_vocab_path)
+    tokenizer = text.BertTokenizer(vocab, lower_case=True)
+    lookup = text.Lookup(vocab, unknown_token='[UNK]')
+    ### Tokenizing sentences and truncate sequence pair
+    dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence1", "sentence2"],
+                          operations=text.TruncateSequencePair(max_seq_len-3))
+    ### Adding special tokens
+    dataset = dataset.map(input_columns=["sentence1"],
+                          operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
+                                                     append=np.array(["[SEP]"], dtype='S')))
+    dataset = dataset.map(input_columns=["sentence2"],
+                          operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')))
+    ### Generating segment_ids
+    dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"],
+                          columns_order=["sentence1", "type_sentence1", "sentence2", "label_id"],
+                          operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
+                          columns_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"],
+                          operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)])
+    dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)])
+    dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
+                          columns_order=["sentence1", "sentence2", "segment_ids", "label_id"],
+                          operations=ops.Concatenate())
+    dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    ### Generating text_ids
+    dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
+                          columns_order=["text_ids", "segment_ids", "label_id"],
+                          operations=ops.Concatenate())
+    dataset = dataset.map(input_columns=["text_ids"], operations=lookup)
+    dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    ### Generating mask_ids
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids", "segment_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
+    dataset = dataset.batch(batch_size)
+    label = []
+    text_ids = []
+    mask_ids = []
+    segment_ids = []
+    for data in dataset:
+        label.append(data[0])
+        text_ids.append(data[1])
+        mask_ids.append(data[2])
+        segment_ids.append(data[3])
+    return label, text_ids, mask_ids, segment_ids
diff --git a/model_zoo/bert/src/cluener_evaluation.py b/model_zoo/bert/src/cluener_evaluation.py
index 09de6bf0b3..f4c747ac38 100644
--- a/model_zoo/bert/src/cluener_evaluation.py
+++ b/model_zoo/bert/src/cluener_evaluation.py
@@ -19,15 +19,13 @@ import json
 import numpy as np
 import mindspore.common.dtype as mstype
 from mindspore.common.tensor import Tensor
-from . import tokenization
-from .sample_process import label_generation, process_one_example_p
-from .evaluation_config import cfg
-from .CRF import postprocess
+from src import tokenization
+from src.sample_process import label_generation, process_one_example_p
+from src.CRF import postprocess
+from src.finetune_eval_config import bert_net_cfg
 
-vocab_file = "./vocab.txt"
-tokenizer_ = tokenization.FullTokenizer(vocab_file=vocab_file)
 
-def process(model, text, sequence_length):
+def process(model=None, text="", tokenizer_=None, use_crf="", label2id_file=""):
     """
     process text.
     """
@@ -36,13 +34,13 @@ def process(model, text, sequence_length):
     res = []
     ids = []
     for i in data:
-        feature = process_one_example_p(tokenizer_, i, max_seq_len=sequence_length)
+        feature = process_one_example_p(tokenizer_, i, max_seq_len=bert_net_cfg.seq_length)
         features.append(feature)
         input_ids, input_mask, token_type_id = feature
         input_ids = Tensor(np.array(input_ids), mstype.int32)
         input_mask = Tensor(np.array(input_mask), mstype.int32)
         token_type_id = Tensor(np.array(token_type_id), mstype.int32)
-        if cfg.use_crf:
+        if use_crf.lower() == "true":
             backpointers, best_tag_id = model.predict(input_ids, input_mask, token_type_id, Tensor(1))
             best_path = postprocess(backpointers, best_tag_id)
             logits = []
@@ -54,19 +52,21 @@ def process(model, text, sequence_length):
             ids = logits.asnumpy()
             ids = np.argmax(ids, axis=-1)
             ids = list(ids)
-    res = label_generation(text, ids)
+    res = label_generation(text=text, probs=ids, label2id_file=label2id_file)
     return res
 
-def submit(model, path, sequence_length):
+def submit(model=None, path="", vocab_file="", use_crf="", label2id_file=""):
     """
     submit task
     """
+    tokenizer_ = tokenization.FullTokenizer(vocab_file=vocab_file)
     data = []
     for line in open(path):
         if not line.strip():
             continue
         oneline = json.loads(line.strip())
-        res = process(model, oneline["text"], sequence_length)
+        res = process(model=model, text=oneline["text"], tokenizer_=tokenizer_,
+                      use_crf=use_crf, label2id_file=label2id_file)
         print("text", oneline["text"])
         print("res:", res)
         data.append(json.dumps({"label": res}, ensure_ascii=False))
diff --git a/model_zoo/bert/src/dataset.py b/model_zoo/bert/src/dataset.py
index 7985ca8559..e530718d4f 100644
--- a/model_zoo/bert/src/dataset.py
+++ b/model_zoo/bert/src/dataset.py
@@ -36,8 +36,8 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
     ds = de.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None,
                             columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                           "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
-                            shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank,
-                            shard_equal_rows=True)
+                            shuffle=de.Shuffle.FILES if do_shuffle == "true" else False,
+                            num_shards=device_num, shard_id=rank, shard_equal_rows=True)
     ori_dataset_size = ds.get_dataset_size()
     print('origin dataset size: ', ori_dataset_size)
     new_size = ori_dataset_size
@@ -58,3 +58,77 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
     logger.info("data size: {}".format(ds.get_dataset_size()))
     logger.info("repeatcount: {}".format(ds.get_repeat_count()))
     return ds, new_repeat_count
+
+
+def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy",
+                       data_file_path=None, schema_file_path=None):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
+    if assessment_method == "Spearman_correlation":
+        type_cast_op_float = C.TypeCast(mstype.float32)
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
+    else:
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
+
+
+def create_classification_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy",
+                                  data_file_path=None, schema_file_path=None):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
+    if assessment_method == "Spearman_correlation":
+        type_cast_op_float = C.TypeCast(mstype.float32)
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
+    else:
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
+
+
+def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, schema_file_path=None, is_training=True):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    if is_training:
+        ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                                columns_list=["input_ids", "input_mask", "segment_ids",
+                                              "start_positions", "end_positions",
+                                              "unique_ids", "is_impossible"])
+        ds = ds.map(input_columns="start_positions", operations=type_cast_op)
+        ds = ds.map(input_columns="end_positions", operations=type_cast_op)
+    else:
+        ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                                columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
+        ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+        ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+        ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
diff --git a/model_zoo/bert/src/finetune_config.py b/model_zoo/bert/src/finetune_config.py
deleted file mode 100644
index 6241d06994..0000000000
--- a/model_zoo/bert/src/finetune_config.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-config settings, will be used in finetune.py
-"""
-
-from easydict import EasyDict as edict
-import mindspore.common.dtype as mstype
-from .bert_model import BertConfig
-
-cfg = edict({
-    'task': 'NER',
-    'num_labels': 41,
-    'data_file': '/your/path/train.tfrecord',
-    'schema_file': '/your/path/schema.json',
-    'epoch_num': 5,
-    'ckpt_prefix': 'bert',
-    'ckpt_dir': None,
-    'pre_training_ckpt': '/your/path/pre_training.ckpt',
-    'use_crf': False,
-    'optimizer': 'Lamb',
-    'AdamWeightDecayDynamicLR': edict({
-        'learning_rate': 2e-5,
-        'end_learning_rate': 1e-7,
-        'power': 1.0,
-        'weight_decay': 1e-5,
-        'eps': 1e-6,
-    }),
-    'Lamb': edict({
-        'start_learning_rate': 2e-5,
-        'end_learning_rate': 1e-7,
-        'power': 1.0,
-        'weight_decay': 0.01,
-        'decay_filter': lambda x: False,
-    }),
-    'Momentum': edict({
-        'learning_rate': 2e-5,
-        'momentum': 0.9,
-    }),
-})
-
-bert_net_cfg = BertConfig(
-    batch_size=16,
-    seq_length=128,
-    vocab_size=21128,
-    hidden_size=768,
-    num_hidden_layers=12,
-    num_attention_heads=12,
-    intermediate_size=3072,
-    hidden_act="gelu",
-    hidden_dropout_prob=0.1,
-    attention_probs_dropout_prob=0.1,
-    max_position_embeddings=512,
-    type_vocab_size=2,
-    initializer_range=0.02,
-    use_relative_positions=False,
-    input_mask_from_dataset=True,
-    token_type_ids_from_dataset=True,
-    dtype=mstype.float32,
-    compute_type=mstype.float16,
-)
-
-tag_to_index = {
-    "O": 0,
-    "S_address": 1,
-    "B_address": 2,
-    "M_address": 3,
-    "E_address": 4,
-    "S_book": 5,
-    "B_book": 6,
-    "M_book": 7,
-    "E_book": 8,
-    "S_company": 9,
-    "B_company": 10,
-    "M_company": 11,
-    "E_company": 12,
-    "S_game": 13,
-    "B_game": 14,
-    "M_game": 15,
-    "E_game": 16,
-    "S_government": 17,
-    "B_government": 18,
-    "M_government": 19,
-    "E_government": 20,
-    "S_movie": 21,
-    "B_movie": 22,
-    "M_movie": 23,
-    "E_movie": 24,
-    "S_name": 25,
-    "B_name": 26,
-    "M_name": 27,
-    "E_name": 28,
-    "S_organization": 29,
-    "B_organization": 30,
-    "M_organization": 31,
-    "E_organization": 32,
-    "S_position": 33,
-    "B_position": 34,
-    "M_position": 35,
-    "E_position": 36,
-    "S_scene": 37,
-    "B_scene": 38,
-    "M_scene": 39,
-    "E_scene": 40,
-    "<START>": 41,
-    "<STOP>": 42
-}
diff --git a/model_zoo/bert/src/evaluation_config.py b/model_zoo/bert/src/finetune_eval_config.py
similarity index 68%
rename from model_zoo/bert/src/evaluation_config.py
rename to model_zoo/bert/src/finetune_eval_config.py
index b18c5643b0..4b8e121e09 100644
--- a/model_zoo/bert/src/evaluation_config.py
+++ b/model_zoo/bert/src/finetune_eval_config.py
@@ -21,18 +21,30 @@ from easydict import EasyDict as edict
 import mindspore.common.dtype as mstype
 from .bert_model import BertConfig
 
-cfg = edict({
-    'task': 'NER',
-    'num_labels': 41,
-    'data_file': '/your/path/evaluation.tfrecord',
-    'schema_file': '/your/path/schema.json',
-    'finetune_ckpt': '/your/path/your.ckpt',
-    'use_crf': False,
-    'clue_benchmark': False,
+optimizer_cfg = edict({
+    'optimizer': 'Lamb',
+    'AdamWeightDecayDynamicLR': edict({
+        'learning_rate': 2e-5,
+        'end_learning_rate': 1e-7,
+        'power': 1.0,
+        'weight_decay': 1e-5,
+        'eps': 1e-6,
+    }),
+    'Lamb': edict({
+        'start_learning_rate': 2e-5,
+        'end_learning_rate': 1e-7,
+        'power': 1.0,
+        'weight_decay': 0.01,
+        'decay_filter': lambda x: False,
+    }),
+    'Momentum': edict({
+        'learning_rate': 2e-5,
+        'momentum': 0.9,
+    }),
 })
 
 bert_net_cfg = BertConfig(
-    batch_size=16 if not cfg.clue_benchmark else 1,
+    batch_size=16,
     seq_length=128,
     vocab_size=21128,
     hidden_size=768,
@@ -40,8 +52,8 @@ bert_net_cfg = BertConfig(
     num_attention_heads=12,
     intermediate_size=3072,
     hidden_act="gelu",
-    hidden_dropout_prob=0.0,
-    attention_probs_dropout_prob=0.0,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
     max_position_embeddings=512,
     type_vocab_size=2,
     initializer_range=0.02,
diff --git a/model_zoo/bert/src/finetune_eval_model.py b/model_zoo/bert/src/finetune_eval_model.py
new file mode 100644
index 0000000000..047decc377
--- /dev/null
+++ b/model_zoo/bert/src/finetune_eval_model.py
@@ -0,0 +1,123 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation model script.
+'''
+
+import mindspore.nn as nn
+from mindspore.common.initializer import TruncatedNormal
+from mindspore.ops import operations as P
+from .bert_model import BertModel
+
+class BertCLSModel(nn.Cell):
+    """
+    This class is responsible for classification task evaluation, i.e. XNLI(num_labels=3),
+    LCQMC(num_labels=2), Chnsenti(num_labels=2). The returned output represents the final
+    logits as the results of log_softmax is propotional to that of softmax.
+    """
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
+                 assessment_method=""):
+        super(BertCLSModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.cast = P.Cast()
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.log_softmax = P.LogSoftmax(axis=-1)
+        self.dtype = config.dtype
+        self.num_labels = num_labels
+        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
+                                has_bias=True).to_float(config.compute_type)
+        self.dropout = nn.Dropout(1 - dropout_prob)
+        self.assessment_method = assessment_method
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        _, pooled_output, _ = \
+            self.bert(input_ids, token_type_id, input_mask)
+        cls = self.cast(pooled_output, self.dtype)
+        cls = self.dropout(cls)
+        logits = self.dense_1(cls)
+        logits = self.cast(logits, self.dtype)
+        if self.assessment_method != "spearman_correlation":
+            logits = self.log_softmax(logits)
+        return logits
+
+class BertSquadModel(nn.Cell):
+    '''
+    This class is responsible for SQuAD
+    '''
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
+        super(BertSquadModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
+                               has_bias=True).to_float(config.compute_type)
+        self.num_labels = num_labels
+        self.dtype = config.dtype
+        self.log_softmax = P.LogSoftmax(axis=1)
+        self.is_training = is_training
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        sequence_output, _, _ = self.bert(input_ids, token_type_id, input_mask)
+        batch_size, seq_length, hidden_size = P.Shape()(sequence_output)
+        sequence = P.Reshape()(sequence_output, (-1, hidden_size))
+        logits = self.dense1(sequence)
+        logits = P.Cast()(logits, self.dtype)
+        logits = P.Reshape()(logits, (batch_size, seq_length, self.num_labels))
+        logits = self.log_softmax(logits)
+        return logits
+
+class BertNERModel(nn.Cell):
+    """
+    This class is responsible for sequence labeling task evaluation, i.e. NER(num_labels=11).
+    The returned output represents the final logits as the results of log_softmax is propotional to that of softmax.
+    """
+    def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
+                 use_one_hot_embeddings=False):
+        super(BertNERModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.cast = P.Cast()
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.log_softmax = P.LogSoftmax(axis=-1)
+        self.dtype = config.dtype
+        self.num_labels = num_labels
+        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
+                                has_bias=True).to_float(config.compute_type)
+        self.dropout = nn.Dropout(1 - dropout_prob)
+        self.reshape = P.Reshape()
+        self.shape = (-1, config.hidden_size)
+        self.use_crf = use_crf
+        self.origin_shape = (config.batch_size, config.seq_length, self.num_labels)
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        sequence_output, _, _ = \
+            self.bert(input_ids, token_type_id, input_mask)
+        seq = self.dropout(sequence_output)
+        seq = self.reshape(seq, self.shape)
+        logits = self.dense_1(seq)
+        logits = self.cast(logits, self.dtype)
+        if self.use_crf:
+            return_value = self.reshape(logits, self.origin_shape)
+        else:
+            return_value = self.log_softmax(logits)
+        return return_value
diff --git a/model_zoo/bert/src/sample_process.py b/model_zoo/bert/src/sample_process.py
index 59f3e76a31..c7cf29c510 100644
--- a/model_zoo/bert/src/sample_process.py
+++ b/model_zoo/bert/src/sample_process.py
@@ -52,12 +52,12 @@ def process_one_example_p(tokenizer, text, max_seq_len=128):
     feature = (input_ids, input_mask, segment_ids)
     return feature
 
-def label_generation(text, probs):
+def label_generation(text="", probs=None, label2id_file=""):
     """generate label"""
     data = [text]
     probs = [probs]
     result = []
-    label2id = json.loads(open("./label2id.json").read())
+    label2id = json.loads(open(label2id_file).read())
     id2label = [k for k, v in label2id.items()]
 
     for index, prob in enumerate(probs):
diff --git a/model_zoo/bert/src/utils.py b/model_zoo/bert/src/utils.py
index ec5651b205..dfb6ffa5fe 100644
--- a/model_zoo/bert/src/utils.py
+++ b/model_zoo/bert/src/utils.py
@@ -17,347 +17,13 @@
 Functional Cells used in Bert finetune and evaluation.
 """
 
+import os
 import mindspore.nn as nn
-from mindspore.common.initializer import TruncatedNormal
 from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.ops import composite as C
 from mindspore.common.tensor import Tensor
-from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common import dtype as mstype
-from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-from mindspore.train.parallel_utils import ParallelMode
-from mindspore.communication.management import get_group_size
-from mindspore import context
-from .bert_model import BertModel
-from .bert_for_pre_training import clip_grad
-from .CRF import CRF
+from mindspore.train.callback import Callback
 
-GRADIENT_CLIP_TYPE = 1
-GRADIENT_CLIP_VALUE = 1.0
-grad_scale = C.MultitypeFuncGraph("grad_scale")
-reciprocal = P.Reciprocal()
-
-@grad_scale.register("Tensor", "Tensor")
-def tensor_grad_scale(scale, grad):
-    return grad * reciprocal(scale)
-
-_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
-grad_overflow = P.FloatStatus()
-
-@_grad_overflow.register("Tensor")
-def _tensor_grad_overflow(grad):
-    return grad_overflow(grad)
-
-class BertFinetuneCell(nn.Cell):
-    """
-    Especifically defined for finetuning where only four inputs tensor are needed.
-    """
-    def __init__(self, network, optimizer, scale_update_cell=None):
-
-        super(BertFinetuneCell, self).__init__(auto_prefix=False)
-        self.network = network
-        self.weights = ParameterTuple(network.trainable_params())
-        self.optimizer = optimizer
-        self.grad = C.GradOperation('grad',
-                                    get_by_list=True,
-                                    sens_param=True)
-        self.reducer_flag = False
-        self.allreduce = P.AllReduce()
-        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
-            self.reducer_flag = True
-        self.grad_reducer = None
-        if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
-            degree = get_group_size()
-            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
-        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
-        self.cast = P.Cast()
-        self.gpu_target = False
-        if context.get_context("device_target") == "GPU":
-            self.gpu_target = True
-            self.float_status = P.FloatStatus()
-            self.addn = P.AddN()
-            self.reshape = P.Reshape()
-        else:
-            self.alloc_status = P.NPUAllocFloatStatus()
-            self.get_status = P.NPUGetFloatStatus()
-            self.clear_before_grad = P.NPUClearFloatStatus()
-        self.reduce_sum = P.ReduceSum(keep_dims=False)
-        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
-        self.base = Tensor(1, mstype.float32)
-        self.less_equal = P.LessEqual()
-        self.hyper_map = C.HyperMap()
-        self.loss_scale = None
-        self.loss_scaling_manager = scale_update_cell
-        if scale_update_cell:
-            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
-                                        name="loss_scale")
-
-    def construct(self,
-                  input_ids,
-                  input_mask,
-                  token_type_id,
-                  label_ids,
-                  sens=None):
-
-
-        weights = self.weights
-        init = False
-        loss = self.network(input_ids,
-                            input_mask,
-                            token_type_id,
-                            label_ids)
-        if sens is None:
-            scaling_sens = self.loss_scale
-        else:
-            scaling_sens = sens
-
-        if not self.gpu_target:
-            init = self.alloc_status()
-            clear_before_grad = self.clear_before_grad(init)
-            F.control_depend(loss, init)
-            self.depend_parameter_use(clear_before_grad, scaling_sens)
-        grads = self.grad(self.network, weights)(input_ids,
-                                                 input_mask,
-                                                 token_type_id,
-                                                 label_ids,
-                                                 self.cast(scaling_sens,
-                                                           mstype.float32))
-        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
-        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        if self.reducer_flag:
-            grads = self.grad_reducer(grads)
-        if not self.gpu_target:
-            flag = self.get_status(init)
-            flag_sum = self.reduce_sum(init, (0,))
-            F.control_depend(grads, flag)
-            F.control_depend(flag, flag_sum)
-        else:
-            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
-            flag_sum = self.addn(flag_sum)
-            flag_sum = self.reshape(flag_sum, (()))
-        if self.is_distributed:
-            flag_reduce = self.allreduce(flag_sum)
-            cond = self.less_equal(self.base, flag_reduce)
-        else:
-            cond = self.less_equal(self.base, flag_sum)
-        overflow = cond
-        if sens is None:
-            overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
-
-class BertSquadCell(nn.Cell):
-    """
-    specifically defined for finetuning where only four inputs tensor are needed.
-    """
-    def __init__(self, network, optimizer, scale_update_cell=None):
-        super(BertSquadCell, self).__init__(auto_prefix=False)
-        self.network = network
-        self.weights = ParameterTuple(network.trainable_params())
-        self.optimizer = optimizer
-        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
-        self.reducer_flag = False
-        self.allreduce = P.AllReduce()
-        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
-            self.reducer_flag = True
-        self.grad_reducer = None
-        if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
-            degree = get_group_size()
-            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
-        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
-        self.cast = P.Cast()
-        self.alloc_status = P.NPUAllocFloatStatus()
-        self.get_status = P.NPUGetFloatStatus()
-        self.clear_before_grad = P.NPUClearFloatStatus()
-        self.reduce_sum = P.ReduceSum(keep_dims=False)
-        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
-        self.base = Tensor(1, mstype.float32)
-        self.less_equal = P.LessEqual()
-        self.hyper_map = C.HyperMap()
-        self.loss_scale = None
-        self.loss_scaling_manager = scale_update_cell
-        if scale_update_cell:
-            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
-                                        name="loss_scale")
-    def construct(self,
-                  input_ids,
-                  input_mask,
-                  token_type_id,
-                  start_position,
-                  end_position,
-                  unique_id,
-                  is_impossible,
-                  sens=None):
-        weights = self.weights
-        init = self.alloc_status()
-        loss = self.network(input_ids,
-                            input_mask,
-                            token_type_id,
-                            start_position,
-                            end_position,
-                            unique_id,
-                            is_impossible)
-        if sens is None:
-            scaling_sens = self.loss_scale
-        else:
-            scaling_sens = sens
-        grads = self.grad(self.network, weights)(input_ids,
-                                                 input_mask,
-                                                 token_type_id,
-                                                 start_position,
-                                                 end_position,
-                                                 unique_id,
-                                                 is_impossible,
-                                                 self.cast(scaling_sens,
-                                                           mstype.float32))
-        clear_before_grad = self.clear_before_grad(init)
-        F.control_depend(loss, init)
-        self.depend_parameter_use(clear_before_grad, scaling_sens)
-        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
-        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        if self.reducer_flag:
-            grads = self.grad_reducer(grads)
-        flag = self.get_status(init)
-        flag_sum = self.reduce_sum(init, (0,))
-        if self.is_distributed:
-            flag_reduce = self.allreduce(flag_sum)
-            cond = self.less_equal(self.base, flag_reduce)
-        else:
-            cond = self.less_equal(self.base, flag_sum)
-        F.control_depend(grads, flag)
-        F.control_depend(flag, flag_sum)
-        overflow = cond
-        if sens is None:
-            overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
-
-
-class BertRegressionModel(nn.Cell):
-    """
-    Bert finetune model for regression task
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertRegressionModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dropout = nn.Dropout(1 - dropout_prob)
-        self.dense_1 = nn.Dense(config.hidden_size, 1, weight_init=self.weight_init,
-                                has_bias=True).to_float(mstype.float16)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        _, pooled_output, _ = self.bert(input_ids, token_type_id, input_mask)
-        cls = self.cast(pooled_output, self.dtype)
-        cls = self.dropout(cls)
-        logits = self.dense_1(cls)
-        logits = self.cast(logits, self.dtype)
-        return logits
-
-
-class BertCLSModel(nn.Cell):
-    """
-    This class is responsible for classification task evaluation, i.e. XNLI(num_labels=3),
-    LCQMC(num_labels=2), Chnsenti(num_labels=2). The returned output represents the final
-    logits as the results of log_softmax is propotional to that of softmax.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertCLSModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
-                                has_bias=True).to_float(config.compute_type)
-        self.dropout = nn.Dropout(1 - dropout_prob)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        _, pooled_output, _ = \
-            self.bert(input_ids, token_type_id, input_mask)
-        cls = self.cast(pooled_output, self.dtype)
-        cls = self.dropout(cls)
-        logits = self.dense_1(cls)
-        logits = self.cast(logits, self.dtype)
-        log_probs = self.log_softmax(logits)
-        return log_probs
-
-class BertSquadModel(nn.Cell):
-    """
-    Bert finetune model for SQuAD v1.1 task
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertSquadModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
-                               has_bias=True).to_float(config.compute_type)
-        self.num_labels = num_labels
-        self.dtype = config.dtype
-        self.log_softmax = P.LogSoftmax(axis=1)
-        self.is_training = is_training
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        sequence_output, _, _ = self.bert(input_ids, token_type_id, input_mask)
-        batch_size, seq_length, hidden_size = P.Shape()(sequence_output)
-        sequence = P.Reshape()(sequence_output, (-1, hidden_size))
-        logits = self.dense1(sequence)
-        logits = P.Cast()(logits, self.dtype)
-        logits = P.Reshape()(logits, (batch_size, seq_length, self.num_labels))
-        logits = self.log_softmax(logits)
-        return logits
-
-class BertNERModel(nn.Cell):
-    """
-    This class is responsible for sequence labeling task evaluation, i.e. NER(num_labels=11).
-    The returned output represents the final logits as the results of log_softmax is propotional to that of softmax.
-    """
-    def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
-                 use_one_hot_embeddings=False):
-        super(BertNERModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
-                                has_bias=True).to_float(config.compute_type)
-        self.dropout = nn.Dropout(1 - dropout_prob)
-        self.reshape = P.Reshape()
-        self.shape = (-1, config.hidden_size)
-        self.use_crf = use_crf
-        self.origin_shape = (config.batch_size, config.seq_length, self.num_labels)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        sequence_output, _, _ = \
-            self.bert(input_ids, token_type_id, input_mask)
-        seq = self.dropout(sequence_output)
-        seq = self.reshape(seq, self.shape)
-        logits = self.dense_1(seq)
-        logits = self.cast(logits, self.dtype)
-        if self.use_crf:
-            return_value = self.reshape(logits, self.origin_shape)
-        else:
-            return_value = self.log_softmax(logits)
-        return return_value
 
 class CrossEntropyCalculation(nn.Cell):
     """
@@ -387,95 +53,73 @@ class CrossEntropyCalculation(nn.Cell):
             return_value = logits * 1.0
         return return_value
 
-class BertCLS(nn.Cell):
-    """
-    Train interface for classification finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertCLS, self).__init__()
-        self.bert = BertCLSModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-    def construct(self, input_ids, input_mask, token_type_id, label_ids):
-        log_probs = self.bert(input_ids, input_mask, token_type_id)
-        loss = self.loss(log_probs, label_ids, self.num_labels)
-        return loss
-
 
-class BertNER(nn.Cell):
-    """
-    Train interface for sequence labeling finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=11, use_crf=False, tag_to_index=None, dropout_prob=0.0,
-                 use_one_hot_embeddings=False):
-        super(BertNER, self).__init__()
-        self.bert = BertNERModel(config, is_training, num_labels, use_crf, dropout_prob, use_one_hot_embeddings)
-        if use_crf:
-            if not tag_to_index:
-                raise Exception("The dict for tag-index mapping should be provided for CRF.")
-            self.loss = CRF(tag_to_index, config.batch_size, config.seq_length, is_training)
-        else:
-            self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-        self.use_crf = use_crf
-    def construct(self, input_ids, input_mask, token_type_id, label_ids):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.use_crf:
-            loss = self.loss(logits, label_ids)
-        else:
-            loss = self.loss(logits, label_ids, self.num_labels)
-        return loss
-
-class BertSquad(nn.Cell):
-    """
-    Train interface for SQuAD finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertSquad, self).__init__()
-        self.bert = BertSquadModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-        self.seq_length = config.seq_length
-        self.is_training = is_training
-        self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
-        self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
-        self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
-        self.sum = P.ReduceSum()
-        self.equal = P.Equal()
-        self.argmax = P.ArgMaxWithValue(axis=1)
-        self.squeeze = P.Squeeze(axis=-1)
-
-    def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.is_training:
-            unstacked_logits_0 = self.squeeze(logits[:, :, 0:1])
-            unstacked_logits_1 = self.squeeze(logits[:, :, 1:2])
-            start_loss = self.loss(unstacked_logits_0, start_position, self.seq_length)
-            end_loss = self.loss(unstacked_logits_1, end_position, self.seq_length)
-            total_loss = (start_loss + end_loss) / 2.0
-        else:
-            start_logits = self.squeeze(logits[:, :, 0:1])
-            end_logits = self.squeeze(logits[:, :, 1:2])
-            total_loss = (unique_id, start_logits, end_logits)
-        return total_loss
-
-
-class BertReg(nn.Cell):
-    """
-    Bert finetune model with loss for regression task
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertReg, self).__init__()
-        self.bert = BertRegressionModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = nn.MSELoss()
-        self.is_training = is_training
-        self.sigmoid = P.Sigmoid()
-        self.cast = P.Cast()
-        self.mul = P.Mul()
-    def construct(self, input_ids, input_mask, token_type_id, labels):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.is_training:
-            loss = self.loss(logits, labels)
-        else:
-            loss = logits
-        return loss
+def make_directory(path: str):
+    """Make directory."""
+    if path is None or not isinstance(path, str) or path.strip() == "":
+        logger.error("The path(%r) is invalid type.", path)
+        raise TypeError("Input path is invaild type")
+
+    # convert the relative paths
+    path = os.path.realpath(path)
+    logger.debug("The abs path is %r", path)
+
+    # check the path is exist and write permissions?
+    if os.path.exists(path):
+        real_path = path
+    else:
+        # All exceptions need to be caught because create directory maybe have some limit(permissions)
+        logger.debug("The directory(%s) doesn't exist, will create it", path)
+        try:
+            os.makedirs(path, exist_ok=True)
+            real_path = path
+        except PermissionError as e:
+            logger.error("No write permission on the directory(%r), error = %r", path, e)
+            raise TypeError("No write permission on the directory.")
+    return real_path
+
+class LossCallBack(Callback):
+    """
+    Monitor the loss in training.
+    If the loss in NAN or INF terminating training.
+    Note:
+        if per_print_times is 0 do not print loss.
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+    """
+    def __init__(self, per_print_times=1):
+        super(LossCallBack, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0")
+        self._per_print_times = per_print_times
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
+                                                           str(cb_params.net_outputs)))
+
+def LoadNewestCkpt(load_finetune_checkpoint_dir, steps_per_epoch, epoch_num, prefix):
+    """
+    Find the ckpt finetune generated and load it into eval network.
+    """
+    files = os.listdir(load_finetune_checkpoint_dir)
+    pre_len = len(prefix)
+    max_num = 0
+    for filename in files:
+        name_ext = os.path.splitext(filename)
+        if name_ext[-1] != ".ckpt":
+            continue
+        #steps_per_epoch = ds.get_dataset_size()
+        if filename.find(prefix) == 0 and not filename[pre_len].isalpha():
+            index = filename[pre_len:].find("-")
+            if index == 0 and max_num == 0:
+                load_finetune_checkpoint_path = os.path.join(load_finetune_checkpoint_dir, filename)
+            elif index not in (0, -1):
+                name_split = name_ext[-2].split('_')
+                if (steps_per_epoch != int(name_split[len(name_split)-1])) \
+                        or (epoch_num != int(filename[pre_len + index + 1:pre_len + index + 2])):
+                    continue
+                num = filename[pre_len + 1:pre_len + index]
+                if int(num) > max_num:
+                    max_num = int(num)
+                    load_finetune_checkpoint_path = os.path.join(load_finetune_checkpoint_dir, filename)
+    return load_finetune_checkpoint_path
diff --git a/model_zoo/faster_rcnn/eval.py b/model_zoo/faster_rcnn/eval.py
index e0b4e2d0ea..d8dd2ed79a 100644
--- a/model_zoo/faster_rcnn/eval.py
+++ b/model_zoo/faster_rcnn/eval.py
@@ -40,7 +40,7 @@ parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoi
 parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
 args_opt = parser.parse_args()
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
 
 def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
     """FasterRcnn evaluation."""
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py b/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
index 05d6d1c9d1..bcf0536f5b 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
@@ -22,7 +22,7 @@ from mindspore.common.tensor import Tensor
 from mindspore.common import dtype as mstype
 from mindspore.common.initializer import initializer
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 def bias_init_zeros(shape):
     """Bias init method."""
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py b/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
index 9428b20914..f9bcc47df4 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
@@ -22,7 +22,7 @@ from mindspore import Tensor
 from mindspore import context
 
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 
 class Proposal(nn.Cell):
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py b/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
index 20d9ee1f34..002ea08d0c 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
@@ -22,7 +22,7 @@ from mindspore.ops import functional as F
 from mindspore import context
 
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 
 def weight_init_ones(shape):
diff --git a/model_zoo/faster_rcnn/train.py b/model_zoo/faster_rcnn/train.py
index 3cc86c7cc1..7d5f190bab 100644
--- a/model_zoo/faster_rcnn/train.py
+++ b/model_zoo/faster_rcnn/train.py
@@ -52,7 +52,7 @@ parser.add_argument("--device_num", type=int, default=1, help="Use device nums,
 parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default is 0.")
 args_opt = parser.parse_args()
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
 
 if __name__ == '__main__':
     if not args_opt.do_eval and args_opt.run_distribute:
diff --git a/model_zoo/gat/README.md b/model_zoo/gat/README.md
index 7c30e08851..0c46aebbaf 100644
--- a/model_zoo/gat/README.md
+++ b/model_zoo/gat/README.md
@@ -72,9 +72,9 @@ sh run_process_data.sh [SRC_PATH] [DATASET_NAME]
 >> Launch
 ```
 #Generate dataset in mindrecord format for cora
-sh run_process_data.sh cora
+./run_process_data.sh ./data cora
 #Generate dataset in mindrecord format for citeseer
-sh run_process_data.sh citeseer
+./run_process_data.sh ./data citeseer
 ```
 
 # Features
diff --git a/model_zoo/gat/train.py b/model_zoo/gat/train.py
index af1808b995..acfbb05b78 100644
--- a/model_zoo/gat/train.py
+++ b/model_zoo/gat/train.py
@@ -96,6 +96,8 @@ def train():
             if eval_acc >= val_acc_max and eval_loss < val_loss_min:
                 val_acc_model = eval_acc
                 val_loss_model = eval_loss
+                if os.path.exists("ckpts/gat.ckpt"):
+                    os.remove("ckpts/gat.ckpt")
                 _exec_save_checkpoint(train_net.network, "ckpts/gat.ckpt")
             val_acc_max = np.max((val_acc_max, eval_acc))
             val_loss_min = np.min((val_loss_min, eval_loss))
diff --git a/model_zoo/googlenet/scripts/run_train.sh b/model_zoo/googlenet/scripts/run_train.sh
index c21c2f04b6..e8c045c8b1 100644
--- a/model_zoo/googlenet/scripts/run_train.sh
+++ b/model_zoo/googlenet/scripts/run_train.sh
@@ -33,10 +33,12 @@ MINDSPORE_HCCL_CONFIG_PATH=$(realpath $1)
 export MINDSPORE_HCCL_CONFIG_PATH
 echo "MINDSPORE_HCCL_CONFIG_PATH=${MINDSPORE_HCCL_CONFIG_PATH}"
 
+export SERVER_ID=0
+rank_start=$((DEVICE_NUM * SERVER_ID))
 for((i=0; i<${DEVICE_NUM}; i++))
 do
     export DEVICE_ID=$i
-    export RANK_ID=$i
+    export RANK_ID=$((rank_start + i))
     rm -rf ./train_parallel$i
     mkdir ./train_parallel$i
     cp -r ./src ./train_parallel$i
diff --git a/model_zoo/googlenet/src/dataset.py b/model_zoo/googlenet/src/dataset.py
index a1cbc2cdab..a3f74a0617 100644
--- a/model_zoo/googlenet/src/dataset.py
+++ b/model_zoo/googlenet/src/dataset.py
@@ -31,8 +31,7 @@ def create_dataset(data_home, repeat_num=1, training=True):
     if not training:
         data_dir = os.path.join(data_home, "cifar-10-verify-bin")
 
-    rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None
-    rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None
+    rank_size, rank_id = _get_rank_info()
     data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id)
 
     resize_height = cfg.image_height
@@ -65,3 +64,19 @@ def create_dataset(data_home, repeat_num=1, training=True):
     data_set = data_set.batch(batch_size=cfg.batch_size, drop_remainder=True)
 
     return data_set
+
+
+def _get_rank_info():
+    """
+    get rank size and rank id
+    """
+    rank_size = int(os.environ.get("RANK_SIZE", 1))
+
+    if rank_size > 1:
+        from mindspore.communication.management import get_rank, get_group_size
+        rank_size = get_group_size()
+        rank_id = get_rank()
+    else:
+        rank_size = rank_id = None
+
+    return rank_size, rank_id
diff --git a/model_zoo/lenet_quant/src/loss_monitor.py b/model_zoo/lenet_quant/src/loss_monitor.py
new file mode 100644
index 0000000000..59c222d23d
--- /dev/null
+++ b/model_zoo/lenet_quant/src/loss_monitor.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""LossMonitor Callback class."""
+
+import time
+import numpy as np
+from mindspore.common.tensor import Tensor
+from mindspore.train.callback import Callback
+
+
+class LossMonitor(Callback):
+    """
+    Monitor the loss in training.
+
+    If the loss is NAN or INF, it will terminate training.
+
+    Note:
+        If per_print_times is 0 do not print loss.
+
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+        lr_init (numpy array): train learning rate. Default: None.
+
+    Raises:
+        ValueError: If print_step is not int or less than zero.
+
+    Examples:
+        >>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
+    """
+
+    def __init__(self, per_print_times=1, lr_init=None):
+        super(LossMonitor, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0.")
+        self._per_print_times = per_print_times
+        self.lr_init = lr_init
+
+    def epoch_begin(self, run_context):
+        self.losses = []
+        self.epoch_time = time.time()
+
+    def epoch_end(self, run_context):
+        cb_params = run_context.original_args()
+        epoch_mseconds = (time.time() - self.epoch_time) * 1000
+        per_step_mseconds = epoch_mseconds / cb_params.batch_num
+        print("Epoch time: {:5.3f}, per step time: {:5.3f}, "
+              "avg loss: {:5.3f}".format(epoch_mseconds,
+                                         per_step_mseconds,
+                                         np.mean(self.losses)))
+        print("*" * 60)
+
+    def step_begin(self, run_context):
+        self.step_time = time.time()
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        step_mseconds = (time.time() - self.step_time) * 1000
+        step_loss = cb_params.net_outputs
+
+        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
+            step_loss = step_loss[0]
+        if isinstance(step_loss, Tensor):
+            step_loss = np.mean(step_loss.asnumpy())
+
+        self.losses.append(step_loss)
+        cur_step_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num) + 1
+
+        if isinstance(step_loss, float) and (np.isnan(step_loss) or np.isinf(step_loss)):
+            raise ValueError("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
+                             "Invalid loss, terminating training.".format(
+                                 cb_params.cur_epoch_num - 1, cb_params.epoch_num,
+                                 cur_step_in_epoch, cb_params.batch_num))
+
+        if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
+            print("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
+                  "loss: [{:5.4f}], avg loss: [{:5.4f}], time: [{:5.4f}ms]".format(
+                      cb_params.cur_epoch_num, cb_params.epoch_num,
+                      cur_step_in_epoch, int(cb_params.batch_num),
+                      step_loss, np.mean(self.losses),
+                      step_mseconds), flush=True)
diff --git a/model_zoo/lenet_quant/train.py b/model_zoo/lenet_quant/train.py
index 2cff465832..03e9ff62bd 100644
--- a/model_zoo/lenet_quant/train.py
+++ b/model_zoo/lenet_quant/train.py
@@ -22,12 +22,13 @@ import os
 import argparse
 import mindspore.nn as nn
 from mindspore import context
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 from mindspore.train import Model
 from mindspore.nn.metrics import Accuracy
 from src.dataset import create_dataset
 from src.config import mnist_cfg as cfg
 from src.lenet_fusion import LeNet5 as LeNet5Fusion
+from src.loss_monitor import LossMonitor
 
 parser = argparse.ArgumentParser(description='MindSpore MNIST Example')
 parser.add_argument('--device_target', type=str, default="Ascend",
diff --git a/model_zoo/lenet_quant/train_quant.py b/model_zoo/lenet_quant/train_quant.py
index 6f27cec1e3..3a87ccc70d 100644
--- a/model_zoo/lenet_quant/train_quant.py
+++ b/model_zoo/lenet_quant/train_quant.py
@@ -23,13 +23,14 @@ import argparse
 import mindspore.nn as nn
 from mindspore import context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 from mindspore.train import Model
 from mindspore.nn.metrics import Accuracy
 from mindspore.train.quant import quant
 from src.dataset import create_dataset
 from src.config import mnist_cfg as cfg
 from src.lenet_fusion import LeNet5 as LeNet5Fusion
+from src.loss_monitor import LossMonitor
 
 parser = argparse.ArgumentParser(description='MindSpore MNIST Example')
 parser.add_argument('--device_target', type=str, default="Ascend",
diff --git a/model_zoo/mass/eval.py b/model_zoo/mass/eval.py
index 4da63a7333..bb844e9102 100644
--- a/model_zoo/mass/eval.py
+++ b/model_zoo/mass/eval.py
@@ -15,15 +15,13 @@
 """Evaluation api."""
 import argparse
 import pickle
-import numpy as np
 
 from mindspore.common import dtype as mstype
 
 from config import TransformerConfig
-from src.transformer import infer
-from src.utils import ngram_ppl
+from src.transformer import infer, infer_ppl
 from src.utils import Dictionary
-from src.utils import rouge
+from src.utils import get_score
 
 parser = argparse.ArgumentParser(description='Evaluation MASS.')
 parser.add_argument("--config", type=str, required=True,
@@ -32,6 +30,8 @@ parser.add_argument("--vocab", type=str, required=True,
                     help="Vocabulary to use.")
 parser.add_argument("--output", type=str, required=True,
                     help="Result file path.")
+parser.add_argument("--metric", type=str, default='rouge',
+                    help='Set eval method.')
 
 
 def get_config(config):
@@ -45,31 +45,15 @@ if __name__ == '__main__':
     args, _ = parser.parse_known_args()
     vocab = Dictionary.load_from_persisted_dict(args.vocab)
     _config = get_config(args.config)
-    result = infer(_config)
+
+    if args.metric == 'rouge':
+        result = infer(_config)
+    else:
+        result = infer_ppl(_config)
+
     with open(args.output, "wb") as f:
         pickle.dump(result, f, 1)
 
-    ppl_score = 0.
-    preds = []
-    tgts = []
-    _count = 0
-    for sample in result:
-        sentence_prob = np.array(sample['prediction_prob'], dtype=np.float32)
-        sentence_prob = sentence_prob[:, 1:]
-        _ppl = []
-        for path in sentence_prob:
-            _ppl.append(ngram_ppl(path, log_softmax=True))
-        ppl = np.min(_ppl)
-        preds.append(' '.join([vocab[t] for t in sample['prediction']]))
-        tgts.append(' '.join([vocab[t] for t in sample['target']]))
-        print(f" | source: {' '.join([vocab[t] for t in sample['source']])}")
-        print(f" | target: {tgts[-1]}")
-        print(f" | prediction: {preds[-1]}")
-        print(f" | ppl: {ppl}.")
-        if np.isinf(ppl):
-            continue
-        ppl_score += ppl
-        _count += 1
-
-    print(f" | PPL={ppl_score / _count}.")
-    rouge(preds, tgts)
+    # get score by given metric
+    score = get_score(result, vocab, metric=args.metric)
+    print(score)
diff --git a/model_zoo/mass/scripts/run.sh b/model_zoo/mass/scripts/run.sh
index 91bed510ea..132e38dae2 100644
--- a/model_zoo/mass/scripts/run.sh
+++ b/model_zoo/mass/scripts/run.sh
@@ -18,7 +18,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 
-options=`getopt -u -o ht:n:i:j:c:o:v: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab: -- "$@"`
+options=`getopt -u -o ht:n:i:j:c:o:v:m: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab:,metric: -- "$@"`
 eval set -- "$options"
 echo $options
 
@@ -35,6 +35,7 @@ echo_help()
   echo "        -c --config              set the configuration file"
   echo "        -o --output              set the output file of inference"
   echo "        -v --vocab               set the vocabulary"
+  echo "        -m --metric              set the metric"
 }
 
 set_hccl_json()
@@ -43,8 +44,8 @@ set_hccl_json()
   do
     if [[ "$1" == "-j" || "$1"  == "--hccl_json" ]]
     then
-      export MINDSPORE_HCCL_CONFIG_PATH=$2 #/data/wsc/hccl_2p_01.json
-      export RANK_TABLE_FILE=$2 #/data/wsc/hccl_2p_01.json
+      export MINDSPORE_HCCL_CONFIG_PATH=$2
+      export RANK_TABLE_FILE=$2
       break
     fi
     shift
@@ -119,6 +120,11 @@ do
     vocab=$2
     shift 2
     ;;
+  -m|--metric)
+    echo "metric";
+    metric=$2
+    shift 2
+    ;;
   --)
     shift
     break
@@ -163,7 +169,7 @@ do
     python train.py --config ${configurations##*/} >>log.log 2>&1 &
   elif [ "$task" == "infer" ]
   then
-    python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} >>log_infer.log 2>&1 &
+    python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} --metric ${metric} >>log_infer.log 2>&1 &
   fi
   cd ../
 done
diff --git a/model_zoo/mass/src/transformer/__init__.py b/model_zoo/mass/src/transformer/__init__.py
index 7912e7f0dd..36db26d360 100644
--- a/model_zoo/mass/src/transformer/__init__.py
+++ b/model_zoo/mass/src/transformer/__init__.py
@@ -19,10 +19,11 @@ from .decoder import TransformerDecoder
 from .beam_search import BeamSearchDecoder
 from .transformer_for_train import TransformerTraining, LabelSmoothedCrossEntropyCriterion, \
     TransformerNetworkWithLoss, TransformerTrainOneStepWithLossScaleCell
-from .infer_mass import infer
+from .infer_mass import infer, infer_ppl
 
 __all__ = [
     "infer",
+    "infer_ppl",
     "TransformerTraining",
     "LabelSmoothedCrossEntropyCriterion",
     "TransformerTrainOneStepWithLossScaleCell",
diff --git a/model_zoo/mass/src/transformer/embedding.py b/model_zoo/mass/src/transformer/embedding.py
index bdce540416..22887b0a3e 100644
--- a/model_zoo/mass/src/transformer/embedding.py
+++ b/model_zoo/mass/src/transformer/embedding.py
@@ -41,7 +41,7 @@ class EmbeddingLookup(nn.Cell):
         self.vocab_size = vocab_size
         self.use_one_hot_embeddings = use_one_hot_embeddings
 
-        init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim])
+        init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
         # 0 is Padding index, thus init it as 0.
         init_weight[0, :] = 0
         self.embedding_table = Parameter(Tensor(init_weight),
diff --git a/model_zoo/mass/src/transformer/infer_mass.py b/model_zoo/mass/src/transformer/infer_mass.py
index 54a0b4e54f..b887e3a7b5 100644
--- a/model_zoo/mass/src/transformer/infer_mass.py
+++ b/model_zoo/mass/src/transformer/infer_mass.py
@@ -17,13 +17,16 @@ import time
 
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.train.model import Model
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
 
 from mindspore import context
 
 from src.dataset import load_dataset
 from .transformer_for_infer import TransformerInferModel
+from .transformer_for_train import TransformerTraining
 from ..utils.load_weights import load_infer_weights
 
 context.set_context(
@@ -156,3 +159,129 @@ def infer(config):
                                 shuffle=False) if config.test_dataset else None
     prediction = transformer_infer(config, eval_dataset)
     return prediction
+
+
+class TransformerInferPPLCell(nn.Cell):
+    """
+    Encapsulation class of transformer network infer for PPL.
+
+    Args:
+        config(TransformerConfig): Config.
+
+    Returns:
+        Tuple[Tensor, Tensor], predicted log prob and label lengths.
+    """
+    def __init__(self, config):
+        super(TransformerInferPPLCell, self).__init__()
+        self.transformer = TransformerTraining(config, is_training=False, use_one_hot_embeddings=False)
+        self.batch_size = config.batch_size
+        self.vocab_size = config.vocab_size
+        self.one_hot = P.OneHot()
+        self.on_value = Tensor(float(1), mstype.float32)
+        self.off_value = Tensor(float(0), mstype.float32)
+        self.reduce_sum = P.ReduceSum()
+        self.reshape = P.Reshape()
+        self.cast = P.Cast()
+        self.flat_shape = (config.batch_size * config.seq_length,)
+        self.batch_shape = (config.batch_size, config.seq_length)
+        self.last_idx = (-1,)
+
+    def construct(self,
+                  source_ids,
+                  source_mask,
+                  target_ids,
+                  target_mask,
+                  label_ids,
+                  label_mask):
+        """Defines the computation performed."""
+
+        predicted_log_probs = self.transformer(source_ids, source_mask, target_ids, target_mask)
+        label_ids = self.reshape(label_ids, self.flat_shape)
+        label_mask = self.cast(label_mask, mstype.float32)
+        one_hot_labels = self.one_hot(label_ids, self.vocab_size, self.on_value, self.off_value)
+
+        label_log_probs = self.reduce_sum(predicted_log_probs * one_hot_labels, self.last_idx)
+        label_log_probs = self.reshape(label_log_probs, self.batch_shape)
+        log_probs = label_log_probs * label_mask
+        lengths = self.reduce_sum(label_mask, self.last_idx)
+
+        return log_probs, lengths
+
+
+def transformer_infer_ppl(config, dataset):
+    """
+    Run infer with Transformer for PPL.
+
+    Args:
+        config (TransformerConfig): Config.
+        dataset (Dataset): Dataset.
+
+    Returns:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "log_prob" and "length".
+    """
+    tfm_infer = TransformerInferPPLCell(config=config)
+    tfm_infer.init_parameters_data()
+
+    parameter_dict = load_checkpoint(config.existed_ckpt)
+    load_param_into_net(tfm_infer, parameter_dict)
+
+    model = Model(tfm_infer)
+
+    log_probs = []
+    lengths = []
+    source_sentences = []
+    target_sentences = []
+    for batch in dataset.create_dict_iterator():
+        source_sentences.append(batch["source_eos_ids"])
+        target_sentences.append(batch["target_eos_ids"])
+
+        source_ids = Tensor(batch["source_eos_ids"], mstype.int32)
+        source_mask = Tensor(batch["source_eos_mask"], mstype.int32)
+        target_ids = Tensor(batch["target_sos_ids"], mstype.int32)
+        target_mask = Tensor(batch["target_sos_mask"], mstype.int32)
+        label_ids = Tensor(batch["target_eos_ids"], mstype.int32)
+        label_mask = Tensor(batch["target_eos_mask"], mstype.int32)
+
+        start_time = time.time()
+        log_prob, length = model.predict(source_ids, source_mask, target_ids, target_mask, label_ids, label_mask)
+        print(f" | Batch size: {config.batch_size}, "
+              f"Time cost: {time.time() - start_time}.")
+
+        log_probs.append(log_prob.asnumpy())
+        lengths.append(length.asnumpy())
+
+    output = []
+    for inputs, ref, log_prob, length in zip(source_sentences,
+                                             target_sentences,
+                                             log_probs,
+                                             lengths):
+        for i in range(config.batch_size):
+            example = {
+                "source": inputs[i].tolist(),
+                "target": ref[i].tolist(),
+                "log_prob": log_prob[i].tolist(),
+                "length": length[i]
+            }
+            output.append(example)
+
+    return output
+
+
+def infer_ppl(config):
+    """
+    Transformer infer PPL api.
+
+    Args:
+        config (TransformerConfig): Config.
+
+    Returns:
+        list, result with
+    """
+    eval_dataset = load_dataset(data_files=config.test_dataset,
+                                batch_size=config.batch_size,
+                                epoch_count=1,
+                                sink_mode=config.dataset_sink_mode,
+                                shuffle=False) if config.test_dataset else None
+    prediction = transformer_infer_ppl(config, eval_dataset)
+    return prediction
diff --git a/model_zoo/mass/src/utils/__init__.py b/model_zoo/mass/src/utils/__init__.py
index f78be57b22..efb9f6f4b6 100644
--- a/model_zoo/mass/src/utils/__init__.py
+++ b/model_zoo/mass/src/utils/__init__.py
@@ -20,6 +20,7 @@ from .loss_monitor import LossCallBack
 from .byte_pair_encoding import bpe_encode
 from .initializer import zero_weight, one_weight, normal_weight, weight_variable
 from .rouge_score import rouge
+from .eval_score import get_score
 
 __all__ = [
     "Dictionary",
@@ -31,5 +32,6 @@ __all__ = [
     "one_weight",
     "zero_weight",
     "normal_weight",
-    "weight_variable"
+    "weight_variable",
+    "get_score"
 ]
diff --git a/model_zoo/mass/src/utils/eval_score.py b/model_zoo/mass/src/utils/eval_score.py
new file mode 100644
index 0000000000..30ff0b2208
--- /dev/null
+++ b/model_zoo/mass/src/utils/eval_score.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Get score by given metric."""
+from .ppl_score import ngram_ppl
+from .rouge_score import rouge
+
+
+def get_ppl_score(result):
+    """
+    Calculate Perplexity(PPL) score.
+
+    Args:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "log_prob" and "length".
+
+    Returns:
+        Float, ppl score.
+    """
+    log_probs = []
+    total_length = 0
+
+    for sample in result:
+        log_prob = sample['log_prob']
+        length = sample['length']
+        log_probs.extend(log_prob)
+        total_length += length
+
+        print(f" | log_prob:{log_prob}")
+        print(f" | length:{length}")
+
+    ppl = ngram_ppl(log_probs, total_length, log_softmax=True)
+    print(f" | final PPL={ppl}.")
+    return ppl
+
+
+def get_rouge_score(result, vocab):
+    """
+    Calculate ROUGE score.
+
+    Args:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "prediction" and "prediction_prob".
+        Dictionary, dict instance.
+
+    retur:
+        Str, rouge score.
+    """
+
+    predictions = []
+    targets = []
+    for sample in result:
+        predictions.append(' '.join([vocab[t] for t in sample['prediction']]))
+        targets.append(' '.join([vocab[t] for t in sample['target']]))
+        print(f" | source: {' '.join([vocab[t] for t in sample['source']])}")
+        print(f" | target: {targets[-1]}")
+
+    return rouge(predictions, targets)
+
+
+def get_score(result, vocab=None, metric='rouge'):
+    """
+    Get eval score.
+
+    Args:
+        List[Dict], prediction.
+        Dictionary, dict instance.
+        Str, metric function, default is rouge.
+
+    Return:
+        Str, Score.
+    """
+    score = None
+    if metric == 'rouge':
+        score = get_rouge_score(result, vocab)
+    elif metric == 'ppl':
+        score = get_ppl_score(result)
+    else:
+        print(f" |metric not in (rouge, ppl)")
+
+    return score
diff --git a/model_zoo/mass/src/utils/ppl_score.py b/model_zoo/mass/src/utils/ppl_score.py
index 2e5d6e6642..4a9139ced0 100644
--- a/model_zoo/mass/src/utils/ppl_score.py
+++ b/model_zoo/mass/src/utils/ppl_score.py
@@ -17,10 +17,7 @@ from typing import Union
 
 import numpy as np
 
-NINF = -1.0 * 1e9
-
-
-def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = np.e):
+def ngram_ppl(prob: Union[np.ndarray, list], length: int, log_softmax=False, index: float = np.e):
     """
     Calculate Perplexity(PPL) score under N-gram language model.
 
@@ -39,7 +36,8 @@ def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = n
     Returns:
         float, ppl score.
     """
-    eps = 1e-8
+    if not length:
+        return np.inf
     if not isinstance(prob, (np.ndarray, list)):
         raise TypeError("`prob` must be type of list or np.ndarray.")
     if not isinstance(prob, np.ndarray):
@@ -47,18 +45,17 @@ def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = n
     if prob.shape[0] == 0:
         raise ValueError("`prob` length must greater than 0.")
 
-    p = 1.0
-    sen_len = 0
-    for t in range(prob.shape[0]):
-        s = prob[t]
-        if s <= NINF:
-            break
-        if log_softmax:
-            s = np.power(index, s)
-        p *= (1 / (s + eps))
-        sen_len += 1
+    print(f'length:{length}, log_prob:{prob}')
 
-    if sen_len == 0:
-        return np.inf
+    if log_softmax:
+        prob = np.sum(prob) / length
+        ppl = 1. / np.power(index, prob)
+        print(f'avg log prob:{prob}')
+    else:
+        p = 1.
+        for i in range(prob.shape[0]):
+            p *= (1. / prob[i])
+        ppl = pow(p, 1 / length)
 
-    return pow(p, 1 / sen_len)
+    print(f'ppl val:{ppl}')
+    return ppl
diff --git a/model_zoo/mobilenetv2/Readme.md b/model_zoo/mobilenetv2/Readme.md
index 5b36a63fe4..1687d2cbdc 100644
--- a/model_zoo/mobilenetv2/Readme.md
+++ b/model_zoo/mobilenetv2/Readme.md
@@ -60,14 +60,14 @@ Dataset used: [imagenet](http://www.image-net.org/)
 
 ### Usage
 
-- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]
+- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [CKPT_PATH]
 - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
 
 ### Launch
 
 ``` 
 # training example
-  Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt
+  Ascend: sh run_train.sh Ascend 8 0,1,2,3,4,5,6,7 hccl_config.json ~/imagenet/train/ mobilenet_199.ckpt
   GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```
 
diff --git a/model_zoo/mobilenetv2/scripts/run_train.sh b/model_zoo/mobilenetv2/scripts/run_train.sh
index f1d80aeac6..a6e2a79477 100644
--- a/model_zoo/mobilenetv2/scripts/run_train.sh
+++ b/model_zoo/mobilenetv2/scripts/run_train.sh
@@ -22,14 +22,16 @@ run_ascend()
     exit 1
     fi
 
-    if [ ! -d $5 ]
+    if [ ! -d $5 ] && [ ! -f $5 ]
     then
-        echo "error: DATASET_PATH=$5 is not a directory"
+        echo "error: DATASET_PATH=$5 is not a directory or file"
     exit 1
     fi
 
     BASEPATH=$(cd "`dirname $0`" || exit; pwd)
     export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+    export MINDSPORE_HCCL_CONFIG_PATH=$4
+    export RANK_TABLE_FILE=$4
     if [ -d "../train" ];
     then
         rm -rf ../train
@@ -38,8 +40,7 @@ run_ascend()
     cd ../train || exit
     python ${BASEPATH}/../src/launch.py \
             --nproc_per_node=$2 \
-            --visible_devices=$4 \
-            --server_id=$3 \
+            --visible_devices=$3 \
             --training_script=${BASEPATH}/../train.py \
             --dataset_path=$5 \
             --pre_trained=$6 \
@@ -80,7 +81,7 @@ run_gpu()
 if [ $# -gt 6 ] || [ $# -lt 4 ]
 then
     echo "Usage:\n \
-          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
+          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [CKPT_PATH]\n \
           GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
           "
 exit 1
diff --git a/model_zoo/mobilenetv2/src/launch.py b/model_zoo/mobilenetv2/src/launch.py
index 48c8159664..f5c97b0bd7 100644
--- a/model_zoo/mobilenetv2/src/launch.py
+++ b/model_zoo/mobilenetv2/src/launch.py
@@ -15,7 +15,6 @@
 """launch train script"""
 import os
 import sys
-import json
 import subprocess
 import shutil
 from argparse import ArgumentParser
@@ -42,8 +41,6 @@ def parse_args():
                              "each process can be bound to a single D.")
     parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
                         help="will use the visible devices sequentially")
-    parser.add_argument("--server_id", type=str, default="",
-                        help="server ip")
     parser.add_argument("--training_script", type=str,
                         help="The full path to the single D training "
                              "program/script to be launched in parallel, "
@@ -63,66 +60,6 @@ def main():
     assert os.path.isfile(args.training_script)
     assert len(visible_devices) >= args.nproc_per_node
     print('visible_devices:{}'.format(visible_devices))
-    if not args.server_id:
-        print('pleaser input server ip!!!')
-        exit(0)
-    print('server_id:{}'.format(args.server_id))
-
-    # construct hccn_table
-    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
-    device_ips = {}
-    for hccn_item in hccn_configs:
-        hccn_item = hccn_item.strip()
-        if hccn_item.startswith('address_'):
-            device_id, device_ip = hccn_item.split('=')
-            device_id = device_id.split('_')[1]
-            device_ips[device_id] = device_ip
-            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
-    hccn_table = {}
-    hccn_table['board_id'] = '0x0000'
-    hccn_table['chip_info'] = '910'
-    hccn_table['deploy_mode'] = 'lab'
-    hccn_table['group_count'] = '1'
-    hccn_table['group_list'] = []
-    instance_list = []
-    usable_dev = ''
-    for instance_id in range(args.nproc_per_node):
-        instance = {}
-        instance['devices'] = []
-        device_id = visible_devices[instance_id]
-        device_ip = device_ips[device_id]
-        usable_dev += str(device_id)
-        instance['devices'].append({
-            'device_id': device_id,
-            'device_ip': device_ip,
-        })
-        instance['rank_id'] = str(instance_id)
-        instance['server_id'] = args.server_id
-        instance_list.append(instance)
-    hccn_table['group_list'].append({
-        'device_num': str(args.nproc_per_node),
-        'server_num': '1',
-        'group_name': '',
-        'instance_count': str(args.nproc_per_node),
-        'instance_list': instance_list,
-    })
-    hccn_table['para_plane_nic_location'] = 'device'
-    hccn_table['para_plane_nic_name'] = []
-    for instance_id in range(args.nproc_per_node):
-        eth_id = visible_devices[instance_id]
-        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
-    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
-    hccn_table['status'] = 'completed'
-
-    # save hccn_table to file
-    table_path = os.getcwd()
-    if not os.path.exists(table_path):
-        os.mkdir(table_path)
-    table_fn = os.path.join(table_path,
-                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
-    with open(table_fn, 'w') as table_fp:
-        json.dump(hccn_table, table_fp, indent=4)
-    sys.stdout.flush()
 
     # spawn the processes
     processes = []
@@ -137,9 +74,6 @@ def main():
         device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
         env['RANK_ID'] = str(rank_id)
         env['DEVICE_ID'] = str(device_id)
-        if args.nproc_per_node > 1:
-            env['MINDSPORE_HCCL_CONFIG_PATH'] = table_fn
-            env['RANK_TABLE_FILE'] = table_fn
         if os.path.exists(device_dir):
             shutil.rmtree(device_dir)
         os.mkdir(device_dir)
diff --git a/model_zoo/mobilenetv2/train.py b/model_zoo/mobilenetv2/train.py
index 2c211b375a..4ae743f540 100644
--- a/model_zoo/mobilenetv2/train.py
+++ b/model_zoo/mobilenetv2/train.py
@@ -18,6 +18,7 @@ import time
 import argparse
 import random
 import numpy as np
+
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
@@ -32,8 +33,9 @@ from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_group_size
+from mindspore.communication.management import init, get_group_size, get_rank
 import mindspore.dataset.engine as de
+
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
@@ -60,9 +62,14 @@ if args_opt.platform == "Ascend":
                         device_id=device_id, save_graphs=False)
 elif args_opt.platform == "GPU":
     context.set_context(mode=context.GRAPH_MODE,
-                        device_target="GPU", save_graphs=False)
+                        device_target="GPU",
+                        save_graphs=False)
+    init("nccl")
+    context.set_auto_parallel_context(device_num=get_group_size(),
+                                      parallel_mode=ParallelMode.DATA_PARALLEL,
+                                      mirror_mean=True)
 else:
-    raise ValueError("Unsupport platform.")
+    raise ValueError("Unsupported device target.")
 
 
 class CrossEntropyWithLabelSmooth(_Loss):
@@ -155,12 +162,8 @@ class Monitor(Callback):
 if __name__ == '__main__':
     if args_opt.platform == "GPU":
         # train on gpu
-        print("train args: ", args_opt, "\ncfg: ", config_gpu)
-
-        init('nccl')
-        context.set_auto_parallel_context(parallel_mode="data_parallel",
-                                          mirror_mean=True,
-                                          device_num=get_group_size())
+        print("train args: ", args_opt)
+        print("cfg: ", config_gpu)
 
         # define net
         net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU")
@@ -201,13 +204,13 @@ if __name__ == '__main__':
                       loss_scale_manager=loss_scale)
 
         cb = [Monitor(lr_init=lr.asnumpy())]
+        ckpt_save_dir = config_gpu.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
         if config_gpu.save_checkpoint:
             config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                          keep_checkpoint_max=config_gpu.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(
-                prefix="mobilenetV2", directory=config_gpu.save_checkpoint_path, config=config_ck)
+            ckpt_cb = ModelCheckpoint(prefix="mobilenetV2", directory=ckpt_save_dir, config=config_ck)
             cb += [ckpt_cb]
-        # begine train
+        # begin train
         model.train(epoch_size, dataset, callbacks=cb)
     elif args_opt.platform == "Ascend":
         # train on ascend
diff --git a/model_zoo/mobilenetv2_quant/export.py b/model_zoo/mobilenetv2_quant/export.py
new file mode 100644
index 0000000000..00e377cece
--- /dev/null
+++ b/model_zoo/mobilenetv2_quant/export.py
@@ -0,0 +1,54 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Export MobilenetV2 on ImageNet"""
+
+import argparse
+import numpy as np
+
+import mindspore
+from mindspore import Tensor
+from mindspore import context
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from mindspore.train.quant import quant
+
+from src.mobilenetV2 import mobilenetV2
+from src.config import config_ascend
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--device_target', type=str, default=None, help='Run device target')
+args_opt = parser.parse_args()
+
+if __name__ == '__main__':
+    cfg = None
+    if args_opt.device_target == "Ascend":
+        cfg = config_ascend
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
+    else:
+        raise ValueError("Unsupported device target: {}.".format(args_opt.device_target))
+
+    # define fusion network
+    network = mobilenetV2(num_classes=cfg.num_classes)
+    # convert fusion network to quantization aware network
+    network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
+    # load checkpoint
+    param_dict = load_checkpoint(args_opt.checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    # export network
+    print("============== Starting export ==============")
+    inputs = Tensor(np.ones([1, 3, cfg.image_height, cfg.image_width]), mindspore.float32)
+    quant.export(network, inputs, file_name="mobilenet_quant", file_format='GEIR')
+    print("============== End export ==============")
diff --git a/model_zoo/mobilenetv3/train.py b/model_zoo/mobilenetv3/train.py
index 578893ab75..57199ec1a7 100644
--- a/model_zoo/mobilenetv3/train.py
+++ b/model_zoo/mobilenetv3/train.py
@@ -18,6 +18,7 @@ import time
 import argparse
 import random
 import numpy as np
+
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
@@ -33,7 +34,8 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 import mindspore.dataset.engine as de
-from mindspore.communication.management import init, get_group_size
+from mindspore.communication.management import init, get_group_size, get_rank
+
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
@@ -57,10 +59,16 @@ if args_opt.platform == "Ascend":
     device_id = int(os.getenv('DEVICE_ID'))
     context.set_context(mode=context.GRAPH_MODE,
                         device_target="Ascend",
-                        device_id=device_id, save_graphs=False)
+                        device_id=device_id,
+                        save_graphs=False)
 elif args_opt.platform == "GPU":
     context.set_context(mode=context.GRAPH_MODE,
-                        device_target="GPU", save_graphs=False)
+                        device_target="GPU",
+                        save_graphs=False)
+    init("nccl")
+    context.set_auto_parallel_context(device_num=get_group_size(),
+                                      parallel_mode=ParallelMode.DATA_PARALLEL,
+                                      mirror_mean=True)
 else:
     raise ValueError("Unsupport platform.")
 
@@ -155,12 +163,8 @@ class Monitor(Callback):
 if __name__ == '__main__':
     if args_opt.platform == "GPU":
         # train on gpu
-        print("train args: ", args_opt, "\ncfg: ", config_gpu)
-
-        init('nccl')
-        context.set_auto_parallel_context(parallel_mode="data_parallel",
-                                          mirror_mean=True,
-                                          device_num=get_group_size())
+        print("train args: ", args_opt)
+        print("cfg: ", config_gpu)
 
         # define net
         net = mobilenet_v3_large(num_classes=config_gpu.num_classes)
@@ -201,11 +205,11 @@ if __name__ == '__main__':
                       loss_scale_manager=loss_scale)
 
         cb = [Monitor(lr_init=lr.asnumpy())]
+        ckpt_save_dir = config_gpu.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
         if config_gpu.save_checkpoint:
             config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                          keep_checkpoint_max=config_gpu.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(
-                prefix="mobilenetV3", directory=config_gpu.save_checkpoint_path, config=config_ck)
+            ckpt_cb = ModelCheckpoint(prefix="mobilenetV3", directory=ckpt_save_dir, config=config_ck)
             cb += [ckpt_cb]
         # begine train
         model.train(epoch_size, dataset, callbacks=cb)
diff --git a/model_zoo/utils/hccl_tools/README.md b/model_zoo/utils/hccl_tools/README.md
new file mode 100644
index 0000000000..b73a99e592
--- /dev/null
+++ b/model_zoo/utils/hccl_tools/README.md
@@ -0,0 +1,14 @@
+# description
+
+mindspore distributed training launch helper utilty that will generate hccl config file.
+
+# use
+
+```
+python hccl_tools.py --device_num [1,8]
+```
+
+output:
+```
+hccl_[device_num]p_[which device]_[server_ip].json
+```
\ No newline at end of file
diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py
new file mode 100644
index 0000000000..ac4114c0a8
--- /dev/null
+++ b/model_zoo/utils/hccl_tools/hccl_tools.py
@@ -0,0 +1,165 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""generate hccl config file script"""
+import os
+import sys
+import json
+import socket
+import platform
+from argparse import ArgumentParser
+from typing import Dict, Any
+
+
+def parse_args():
+    """
+    parse args .
+
+    Args:
+
+    Returns:
+        args.
+
+    Examples:
+        >>> parse_args()
+    """
+    parser = ArgumentParser(description="mindspore distributed training launch "
+                                        "helper utilty that will generate hccl"
+                                        " config file")
+    parser.add_argument("--device_num", type=str, default="[0,8]",
+                        help="The number of the D chip used. please note that the D chips"
+                             "used must be continuous, such [0,4] means to use four chips "
+                             "0，1，2，3; [0,1] means to use chip 0; The first four chips are"
+                             "a group, and the last four chips are a group. In addition to"
+                             "the [0,8] chips are allowed, other cross-group such as [3,6]"
+                             "are prohibited.")
+    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
+                        help="will use the visible devices sequentially")
+    parser.add_argument("--server_ip", type=str, default="",
+                        help="server ip")
+    args = parser.parse_args()
+    return args
+
+
+def get_host_ip():
+    """
+    get host ip
+    """
+    ip = None
+
+    try:
+        hostname = socket.gethostname()
+        ip = socket.gethostbyname(hostname)
+    except EOFError:
+        pass
+
+    return ip
+
+
+def main():
+    print("start", __file__)
+    args = parse_args()
+
+    # visible_devices
+    visible_devices = args.visible_devices.split(',')
+    print('visible_devices:{}'.format(visible_devices))
+
+    # server_id
+    ip = get_host_ip()
+    if args.server_ip:
+        server_id = args.server_ip
+    elif ip:
+        server_id = ip
+    else:
+        raise ValueError("please input server ip!")
+    print('server_id:{}'.format(server_id))
+
+    # device_num
+    first_num = int(args.device_num[1])
+    last_num = int(args.device_num[3])
+    if first_num < 0 or last_num > 8:
+        raise ValueError("device num {} must be in range [0,8] !".format(args.device_num))
+    if first_num > last_num:
+        raise ValueError("First num {} of device num {} must less than last num {} !".format(first_num, args.device_num,
+                                                                                             last_num))
+    if first_num < 4:
+        if last_num > 4:
+            if first_num == 0 and last_num == 8:
+                pass
+            else:
+                raise ValueError("device num {} must be in the same group of [0,4] or [4,8] !".format(args.device_num))
+
+    device_num_list = list(range(first_num, last_num))
+    print("device_num_list:", device_num_list)
+
+    assert len(visible_devices) >= len(device_num_list)
+
+    # construct hccn_table
+    device_ips: Dict[Any, Any] = {}
+    with open('/etc/hccn.conf', 'r') as fin:
+        for hccn_item in fin.readlines():
+            if hccn_item.strip().startswith('address_'):
+                device_id, device_ip = hccn_item.split('=')
+                device_id = device_id.split('_')[1]
+                device_ips[device_id] = device_ip.strip()
+
+    arch = platform.processor()
+    hccn_table = {'board_id': {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch],
+                  'chip_info': '910',
+                  'deploy_mode': 'lab',
+                  'group_count': '1',
+                  'group_list': []}
+    instance_list = []
+    rank_id = 0
+    for instance_id in device_num_list:
+        instance = {'devices': []}
+        device_id = visible_devices[instance_id]
+        device_ip = device_ips[device_id]
+        instance['devices'].append({
+            'device_id': device_id,
+            'device_ip': device_ip,
+        })
+        print('rank_id:{}, device_id:{}, device_ip:{}'.format(rank_id, device_id, device_ip))
+        instance['rank_id'] = str(rank_id)
+        rank_id += 1
+        instance['server_id'] = server_id
+        instance_list.append(instance)
+    hccn_table['group_list'].append({
+        'device_num': str(len(device_num_list)),
+        'server_num': '1',
+        'group_name': '',
+        'instance_count': str(len(device_num_list)),
+        'instance_list': instance_list,
+    })
+    hccn_table['para_plane_nic_location'] = 'device'
+    hccn_table['para_plane_nic_name'] = []
+    for instance_id in device_num_list:
+        eth_id = visible_devices[instance_id]
+        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
+    hccn_table['para_plane_nic_num'] = str(len(device_num_list))
+    hccn_table['status'] = 'completed'
+
+    # save hccn_table to file
+    table_path = os.getcwd()
+    table_fn = os.path.join(table_path,
+                            'hccl_{}p_{}_{}.json'.format(len(device_num_list), "".join(map(str, device_num_list)),
+                                                         server_id))
+    with open(table_fn, 'w') as table_fp:
+        json.dump(hccn_table, table_fp, indent=4)
+    sys.stdout.flush()
+    print("Completed: hccl file was save in :", table_fn)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_zoo/wide_and_deep/src/wide_and_deep.py b/model_zoo/wide_and_deep/src/wide_and_deep.py
index 16102039a8..048bf3c66d 100644
--- a/model_zoo/wide_and_deep/src/wide_and_deep.py
+++ b/model_zoo/wide_and_deep/src/wide_and_deep.py
@@ -188,7 +188,7 @@ class WideDeepModel(nn.Cell):
                                         self.deep_layer_act,
                                         use_activation=False, convert_dtype=True, drop_out=config.dropout_flag)
 
-        self.gather_v2 = P.GatherV2()
+        self.embeddinglookup = nn.EmbeddingLookup(target='DEVICE')
         self.mul = P.Mul()
         self.reduce_sum = P.ReduceSum(keep_dims=False)
         self.reshape = P.Reshape()
@@ -206,11 +206,11 @@ class WideDeepModel(nn.Cell):
         """
         mask = self.reshape(wt_hldr, (self.batch_size, self.field_size, 1))
         # Wide layer
-        wide_id_weight = self.gather_v2(self.wide_w, id_hldr, 0)
+        wide_id_weight = self.embeddinglookup(self.wide_w, id_hldr)
         wx = self.mul(wide_id_weight, mask)
         wide_out = self.reshape(self.reduce_sum(wx, 1) + self.wide_b, (-1, 1))
         # Deep layer
-        deep_id_embs = self.gather_v2(self.embedding_table, id_hldr, 0)
+        deep_id_embs = self.embeddinglookup(self.embedding_table, id_hldr)
         vx = self.mul(deep_id_embs, mask)
         deep_in = self.reshape(vx, (-1, self.field_size * self.emb_dim))
         deep_in = self.dense_layer_1(deep_in)
diff --git a/scripts/build_icu4c.sh b/scripts/build_icu4c.sh
new file mode 100755
index 0000000000..c7f21b756f
--- /dev/null
+++ b/scripts/build_icu4c.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+echo '{
+  "strategy": "additive",
+  "featureFilters": {
+    "normalization": "include"
+  }
+}' > filter.json
+./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=filter.json "$@"
diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt
index 3c1c08ece0..4529323fe1 100644
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -13,7 +13,6 @@ add_library(protobuf::libprotobuf ALIAS protobuf::protobuf)
 add_executable(protobuf::libprotoc ALIAS protobuf::protoc)
 
 set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
-set(_REFLECTION gRPC::grpc++_reflection)
 if(CMAKE_CROSSCOMPILING)
     find_program(_PROTOBUF_PROTOC protoc)
 else()
@@ -22,10 +21,19 @@ endif()
 
 # Find gRPC installation
 # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
+if (EXISTS ${grpc_ROOT}/lib64)
+    set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
+else()
+    set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
+endif()
+message("serving using grpc_DIR : " ${gPRC_DIR})
+
 find_package(gRPC CONFIG REQUIRED)
 message(STATUS "Using gRPC ${gRPC_VERSION}")
 
 set(_GRPC_GRPCPP gRPC::grpc++)
+set(_REFLECTION gRPC::grpc++_reflection)
+
 if(CMAKE_CROSSCOMPILING)
     find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
 else()
diff --git a/setup.py b/setup.py
index 2840eb3b14..bf16c9106b 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,7 @@ package_data = {
         'lib/*.so*',
         'lib/*.a',
         '.commit_id',
+        'ms_serving'
     ]
 }
 
@@ -125,6 +126,8 @@ def update_permissions(path):
         for filename in filenames:
             file_fullpath = os.path.join(dirpath, filename)
             os.chmod(file_fullpath, stat.S_IREAD)
+            if filename == "ms_serving":
+                os.chmod(file_fullpath, stat.S_IREAD | stat.S_IEXEC)
 
 
 class EggInfo(egg_info):
diff --git a/tests/st/control/test_switch_layer.py b/tests/st/control/test_switch_layer.py
new file mode 100644
index 0000000000..4accb44f1a
--- /dev/null
+++ b/tests/st/control/test_switch_layer.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+from mindspore import Tensor, nn
+from mindspore.common import dtype as mstype
+
+
+class CaseNet(nn.Cell):
+    def __init__(self):
+        super(CaseNet, self).__init__()
+        self.conv = nn.Conv2d(1, 3, 3)
+        self.relu = nn.ReLU()
+        self.softmax = nn.Softmax()
+        self.layers1 = (self.relu, self.softmax)
+        self.layers2 = (self.conv, self.relu)
+
+    def construct(self, x, index1, index2):
+        x = self.layers1[index1](x)
+        x = self.layers2[index2](x)
+        return x
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_switch_layer():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = CaseNet()
+    data = Tensor(np.ones((1, 1, 224, 224)), mstype.float32)
+    idx = Tensor(0, mstype.int32)
+    idx2 = Tensor(-1, mstype.int32)
+    value = net(data, idx, idx2)
+    relu = nn.ReLU()
+    true_value = relu(data)
+    ret = np.allclose(value.asnumpy(), true_value.asnumpy())
+    assert ret
+
+    idx3 = Tensor(3, mstype.int32)
+    with pytest.raises(RuntimeError):
+        value = net(data, idx3, idx2)
diff --git a/mindspore/model_zoo/resnet.py b/tests/st/networks/models/resnet50/src/resnet.py
similarity index 100%
rename from mindspore/model_zoo/resnet.py
rename to tests/st/networks/models/resnet50/src/resnet.py
diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
index c88af6bcf7..e721b62c58 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
@@ -27,10 +27,10 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.model_zoo.resnet import resnet50
 import mindspore.nn as nn
 import mindspore.dataset as ds
 
+from tests.st.networks.models.resnet50.src.resnet import resnet50
 from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate
 from tests.st.networks.models.resnet50.src.config import config
diff --git a/tests/st/ops/ascend/test_autocast.py b/tests/st/ops/ascend/test_autocast.py
index 448dc9b4d6..35690ce2c4 100644
--- a/tests/st/ops/ascend/test_autocast.py
+++ b/tests/st/ops/ascend/test_autocast.py
@@ -246,3 +246,21 @@ def test_tensor_auto_cast():
         bnet(t_fp32)
     with pytest.raises(TypeError):
         bnet(t_fp64)
+def test_bool_tensor_and_float():
+    context.set_context(mode=context.GRAPH_MODE)
+    t_bool = Tensor(np.ones([2, 1, 2, 2]).astype(np.bool), mstype.bool_)
+    t_int32 = Tensor(np.ones([2, 1, 2, 2]), mstype.int32)
+    t_fp16 = Tensor(np.ones([2, 1, 2, 2]), mstype.float16)
+    t_fp32 = Tensor(np.ones([2, 1, 2, 2]), mstype.float32)
+    net = TensorFPAutoCast()
+    out = net(t_bool)
+    assert out.dtype == mstype.float32
+    net = TensorIntAutoCast()
+    out = net(t_bool)
+    assert out.dtype == mstype.int32
+    out = net(t_fp16)
+    assert out.dtype == mstype.float16
+    out = net(t_fp32)
+    assert out.dtype == mstype.float32
+    out = net(t_int32)
+    assert out.dtype == mstype.int32
diff --git a/tests/st/ops/ascend/test_distribution/test_bernoulli.py b/tests/st/ops/ascend/test_distribution/test_bernoulli.py
new file mode 100644
index 0000000000..5652d536c7
--- /dev/null
+++ b/tests/st/ops/ascend/test_distribution/test_bernoulli.py
@@ -0,0 +1,147 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test cases for bernoulli distribution"""
+import numpy as np
+from scipy import stats
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import ms_function
+from mindspore import dtype
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+class Net(nn.Cell):
+    """
+    Test class: probability of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('prob', x_)
+
+class Net1(nn.Cell):
+    """
+    Test class: log probability of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net1, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('log_prob', x_)
+
+class Net2(nn.Cell):
+    """
+    Test class: kl_loss between bernoulli distributions.
+    """
+    def __init__(self):
+        super(Net2, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('kl_loss', 'Bernoulli', x_)
+
+class Net3(nn.Cell):
+    """
+    Test class: mean/sd of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net3, self).__init__()
+        self.b = nn.Bernoulli([0.5, 0.5], dtype=dtype.int32)
+
+    @ms_function
+    def construct(self):
+        return self.b('mean'), self.b('sd')
+
+class Net4(nn.Cell):
+    """
+    Test class: log probability of bernoulli distribution.
+    """
+    def __init__(self, shape, seed=0):
+        super(Net4, self).__init__()
+        self.b = nn.Bernoulli([0.7, 0.5], seed=seed, dtype=dtype.int32)
+        self.shape = shape
+
+    @ms_function
+    def construct(self, probs=None):
+        return self.b('sample', self.shape, probs)
+
+def test_pmf():
+    """
+    Test pmf.
+    """
+    bernoulli_benchmark = stats.bernoulli(0.7)
+    expect_pmf = bernoulli_benchmark.pmf([0, 1, 0, 1, 1]).astype(np.float32)
+    pdf = Net()
+    x_ = Tensor(np.array([0, 1, 0, 1, 1]).astype(np.int32), dtype=dtype.float32)
+    output = pdf(x_)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_pmf) < tol).all()
+
+def test_log_likelihood():
+    """
+    Test log_pmf.
+    """
+    bernoulli_benchmark = stats.bernoulli(0.7)
+    expect_logpmf = bernoulli_benchmark.logpmf([0, 1, 0, 1, 1]).astype(np.float32)
+    logprob = Net1()
+    x_ = Tensor(np.array([0, 1, 0, 1, 1]).astype(np.int32), dtype=dtype.float32)
+    output = logprob(x_)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_logpmf) < tol).all()
+
+def test_kl_loss():
+    """
+    Test kl_loss.
+    """
+    probs1_a = 0.7
+    probs1_b = 0.5
+    probs0_a = 1 - probs1_a
+    probs0_b = 1 - probs1_b
+    expect_kl_loss = probs1_a * np.log(probs1_a / probs1_b) + probs0_a * np.log(probs0_a / probs0_b)
+    kl_loss = Net2()
+    output = kl_loss(Tensor([probs1_b], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_kl_loss) < tol).all()
+
+def test_basics():
+    """
+    Test mean/standard deviation and probs.
+    """
+    basics = Net3()
+    mean, sd = basics()
+    expect_mean = [0.5, 0.5]
+    assert (mean.asnumpy() == expect_mean).all()
+    assert (sd.asnumpy() == expect_mean).all()
+    b = nn.Bernoulli([0.7, 0.5], dtype=dtype.int32)
+    probs = b.probs()
+    expect_probs = [0.7, 0.5]
+    tol = 1e-6
+    assert (np.abs(probs.asnumpy() - expect_probs) < tol).all()
+
+def test_sample():
+    """
+    Test sample.
+    """
+    shape = (2, 3)
+    sample = Net4(shape)
+    output = sample()
+    assert output.shape == (2, 3, 2)
diff --git a/tests/st/ops/ascend/test_distribution/test_normal.py b/tests/st/ops/ascend/test_distribution/test_normal.py
new file mode 100644
index 0000000000..52bb1173ee
--- /dev/null
+++ b/tests/st/ops/ascend/test_distribution/test_normal.py
@@ -0,0 +1,152 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test cases for normal distribution"""
+import numpy as np
+from scipy import stats
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import ms_function
+from mindspore import dtype
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+class Net(nn.Cell):
+    """
+    Test class: probability of normal distribution.
+    """
+    def __init__(self):
+        super(Net, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.n('prob', x_)
+
+class Net1(nn.Cell):
+    """
+    Test class: log probability of normal distribution.
+    """
+    def __init__(self):
+        super(Net1, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.n('log_prob', x_)
+
+class Net2(nn.Cell):
+    """
+    Test class: kl_loss of normal distribution.
+    """
+    def __init__(self):
+        super(Net2, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([4.0]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_, y_):
+        return self.n('kl_loss', 'Normal', x_, y_)
+
+class Net3(nn.Cell):
+    """
+    Test class: mean/sd of normal distribution.
+    """
+    def __init__(self):
+        super(Net3, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([2.0, 4.0]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self):
+        return self.n('mean'), self.n('sd')
+
+class Net4(nn.Cell):
+    """
+    Test class: mean/sd of normal distribution.
+    """
+    def __init__(self, shape, seed=0):
+        super(Net4, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), seed=seed, dtype=dtype.float32)
+        self.shape = shape
+
+    @ms_function
+    def construct(self, mean=None, sd=None):
+        return self.n('sample', self.shape, mean, sd)
+
+def test_pdf():
+    """
+    Test pdf.
+    """
+    norm_benchmark = stats.norm(np.array([3.0]), np.array([[2.0], [4.0]]))
+    expect_pdf = norm_benchmark.pdf([1.0, 2.0]).astype(np.float32)
+    pdf = Net()
+    output = pdf(Tensor([1.0, 2.0], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_pdf) < tol).all()
+
+def test_log_likelihood():
+    """
+    Test log_pdf.
+    """
+    norm_benchmark = stats.norm(np.array([3.0]), np.array([[2.0], [4.0]]))
+    expect_logpdf = norm_benchmark.logpdf([1.0, 2.0]).astype(np.float32)
+    logprob = Net1()
+    output = logprob(Tensor([1.0, 2.0], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_logpdf) < tol).all()
+
+def test_kl_loss():
+    """
+    Test kl_loss.
+    """
+    mean_a = np.array([3.0]).astype(np.float32)
+    sd_a = np.array([4.0]).astype(np.float32)
+
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+
+    diff_log_scale = np.log(sd_a) - np.log(sd_b)
+    squared_diff = np.square(mean_a / sd_b - mean_b / sd_b)
+    expect_kl_loss = 0.5 * squared_diff + 0.5 * np.expm1(2 * diff_log_scale) - diff_log_scale
+
+    kl_loss = Net2()
+    mean = Tensor(mean_b, dtype=dtype.float32)
+    sd = Tensor(sd_b, dtype=dtype.float32)
+    output = kl_loss(mean, sd)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_kl_loss) < tol).all()
+
+def test_basics():
+    """
+    Test mean/standard deviation.
+    """
+    basics = Net3()
+    mean, sd = basics()
+    expect_mean = [3.0, 3.0]
+    expect_sd = [2.0, 4.0]
+    tol = 1e-6
+    assert (np.abs(mean.asnumpy() - expect_mean) < tol).all()
+    assert (np.abs(sd.asnumpy() - expect_sd) < tol).all()
+
+def test_sample():
+    """
+    Test sample.
+    """
+    shape = (2, 3)
+    seed = 10
+    mean = Tensor([2.0], dtype=dtype.float32)
+    sd = Tensor([2.0, 2.0, 2.0], dtype=dtype.float32)
+    sample = Net4(shape, seed=seed)
+    output = sample(mean, sd)
+    assert output.shape == (2, 3, 3)
diff --git a/tests/st/ops/gpu/test_ctcloss_op.py b/tests/st/ops/gpu/test_ctcloss_op.py
new file mode 100644
index 0000000000..b9a88e7e70
--- /dev/null
+++ b/tests/st/ops/gpu/test_ctcloss_op.py
@@ -0,0 +1,119 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import operations as P
+from mindspore.common import dtype as mstype
+from mindspore.ops.composite import GradOperation
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.loss = P.CTCLossV2()
+        self.div = P.RealDiv()
+        self.cast = P.Cast()
+        self.mean = P.ReduceMean()
+
+    def construct(self, probs, label, input_length, label_length):
+        x, _ = self.loss(probs, label, input_length, label_length)
+        x = self.div(x, self.cast(label_length, mstype.float32))
+        x = self.mean(x)
+        return x
+
+class GradData(nn.Cell):
+    def __init__(self, network):
+        super(GradData, self).__init__()
+        self.grad = GradOperation(name="get_all", get_all=True, sens_param=False)
+        self.network = network
+
+    def construct(self, probs, labels, input_lengths, label_lengths):
+        return self.grad(self.network)(probs, labels, input_lengths, label_lengths)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_ctcloss():
+    probs = Tensor([[[-4.4131, -4.6093, -3.4333, -3.9268, -2.8917, -3.4093, -4.2243, -1.1379, -7.1046, -0.6902],
+                     [-2.5109, -3.3397, -4.9384, -1.2723, -1.1443, -2.4683, -2.6768, -4.1282, -2.7062, -3.1906],
+                     [-2.5092, -1.6392, -2.0864, -4.0059, -1.5610, -2.3223, -2.4816, -2.9922, -3.1412, -2.3311]],
+
+                    [[-2.1243, -3.5773, -3.1108, -4.4253, -2.7080, -1.9653, -2.0499, -2.4418, -1.8620, -1.5229],
+                     [-2.2479, -3.5128, -1.4189, -2.8701, -1.8562, -2.2752, -2.7019, -2.1865, -2.5634, -2.9869],
+                     [-3.2144, -1.3986, -3.1083, -3.9634, -3.5131, -3.2317, -2.6200, -1.7938, -1.8159, -1.7255]],
+
+                    [[-3.1301, -2.1649, -0.9286, -2.9452, -2.5992, -2.0263, -2.9201, -3.2155, -2.8302, -3.3636],
+                     [-1.4661, -3.6311, -2.4781, -4.6180, -2.7308, -1.7019, -1.5570, -2.6012, -4.0788, -2.3073],
+                     [-2.6833, -1.5033, -3.6922, -2.6360, -2.6974, -2.6847, -2.7579, -2.1396, -1.4093, -2.9630]],
+
+                    [[-2.0094, -2.3024, -3.3673, -1.0220, -2.8326, -2.2613, -3.0535, -2.9879, -3.7015, -2.4510],
+                     [-1.9071, -3.2603, -2.3229, -2.0572, -4.3450, -2.1284, -2.6306, -1.3824, -2.9815, -2.5061],
+                     [-2.7931, -3.7631, -3.2440, -4.3887, -1.0271, -3.8851, -1.2418, -4.5123, -2.2993, -2.4607]],
+
+                    [[-1.5763, -2.7539, -3.6941, -3.8166, -1.2599, -2.6903, -2.5826, -4.8208, -2.9562, -1.6321],
+                     [-3.3031, -3.0087, -1.9982, -1.9081, -3.8731, -2.8764, -2.2485, -2.3808, -1.4283, -2.1625],
+                     [-2.4516, -3.2394, -4.2053, -4.3541, -2.5229, -4.0717, -1.4894, -2.3151, -1.1098, -2.3465]]],
+                   dtype=mstype.float32)
+    labels = Tensor([9, 4, 6, 4, 7, 1, 4, 6, 6, 8], dtype=mstype.int32)
+    input_lengths = Tensor([5, 5, 5], dtype=mstype.int32)
+    label_lengths = Tensor([3, 3, 4], dtype=mstype.int32)
+
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+    net = Net()
+    ctc_loss = net(probs, labels, input_lengths, label_lengths)
+    expect_loss = [2.4099]
+    assert np.allclose(ctc_loss.asnumpy(), expect_loss)
+
+    grad = GradData(net)(probs, labels, input_lengths, label_lengths)
+    expect_grad = [[[8.8442e-05, 1.1065e-03, 3.5867e-03, 2.1896e-03, 6.1646e-03,
+                     3.6738e-03, 1.6262e-03, 3.5610e-02, 9.1258e-05, -5.4134e-02],
+                    [-3.7523e-03, 3.9386e-03, 7.9623e-04, 3.1132e-02, -6.2954e-02,
+                     9.4143e-03, 7.6425e-03, 1.7902e-03, 7.4211e-03, 4.5719e-03],
+                    [6.7778e-03, 1.6178e-02, 1.0344e-02, 1.5173e-03, -6.5840e-02,
+                     8.1707e-03, 6.9674e-03, 4.1814e-03, 3.6026e-03, 8.0991e-03]],
+
+                   [[-1.2581e-02, 3.1057e-03, 4.9517e-03, 1.3301e-03, -2.6320e-02,
+                     1.5568e-02, 1.4305e-02, 9.6671e-03, 1.7262e-02, -2.7292e-02],
+                    [-1.5566e-02, 3.3126e-03, 2.6887e-02, 6.2993e-03, -3.9716e-02,
+                     1.1420e-02, 7.4531e-03, -1.4252e-02, 8.5603e-03, 5.6048e-03],
+                    [3.3483e-03, 2.0579e-02, 3.7231e-03, 1.5832e-03, 2.4837e-03,
+                     3.2909e-03, -7.7267e-02, 1.3861e-02, 1.3558e-02, 1.4840e-02]],
+
+                   [[-8.0007e-03, 1.2751e-02, 4.3901e-02, 5.8435e-03, -7.2627e-02,
+                     1.4647e-02, -8.0584e-03, 4.4595e-03, 6.5557e-03, 5.2891e-04],
+                    [-3.6006e-02, 1.5308e-03, 9.3225e-03, 1.0969e-03, -2.5098e-03,
+                     2.0260e-02, 2.3419e-02, -3.0053e-02, 1.8809e-03, 1.1059e-02],
+                    [-7.7639e-02, 1.8533e-02, 2.0764e-03, 5.9706e-03, 5.6150e-03,
+                     5.6868e-03, 5.2854e-03, 9.8085e-03, 2.0360e-02, 4.3053e-03]],
+
+                   [[-2.6776e-02, 1.1113e-02, 3.8314e-03, 3.9986e-02, -1.6020e-02,
+                     1.1579e-02, -4.1635e-02, 5.5992e-03, 2.7429e-03, 9.5786e-03],
+                    [-6.8619e-03, -6.4066e-03, 1.0888e-02, 1.4201e-02, 1.4413e-03,
+                     1.3225e-02, 8.0039e-03, -4.9191e-02, 5.6352e-03, 9.0651e-03],
+                    [5.1026e-03, 1.9343e-03, 3.2506e-03, 1.0347e-03, 2.9837e-02,
+                     1.7121e-03, -5.9261e-02, 9.1443e-04, 8.3608e-03, 7.1146e-03]],
+
+                   [[-2.0848e-02, 7.0754e-03, 2.7633e-03, 2.4447e-03, 3.1520e-02,
+                     7.5401e-03, -5.8895e-02, 8.9559e-04, 5.7796e-03, 2.1724e-02],
+                    [-1.3499e-03, -1.0019e-01, 1.5064e-02, 1.6485e-02, 2.3104e-03,
+                     6.2597e-03, 1.1729e-02, 1.0275e-02, 2.6635e-02, 1.2782e-02],
+                    [7.1796e-03, 3.2656e-03, 1.2430e-03, 1.0712e-03, 6.6856e-03,
+                     1.4207e-03, 1.8792e-02, 8.2297e-03, -5.5865e-02, 7.9753e-03]]]
+    assert np.allclose(grad[0].asnumpy(), expect_grad, atol=1e-5)
diff --git a/tests/st/ops/gpu/test_dense_op.py b/tests/st/ops/gpu/test_dense_op.py
index 220f7ae051..e9c010ea77 100644
--- a/tests/st/ops/gpu/test_dense_op.py
+++ b/tests/st/ops/gpu/test_dense_op.py
@@ -228,6 +228,7 @@ def test_biasadd_3d():
     error = np.ones(shape=[3, 4, 8]) * 1.0e-6
     context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
     net = BiasAdd()
+    net.set_grad()
     result = net(x, b)
     diff = result.asnumpy() - expect
     assert np.all(diff < error)
diff --git a/tests/st/ops/gpu/test_normal.py b/tests/st/ops/gpu/test_normal.py
new file mode 100644
index 0000000000..0c4866f6f0
--- /dev/null
+++ b/tests/st/ops/gpu/test_normal.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.ops import composite as C
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+
+class Net(nn.Cell):
+    def __init__(self, shape, seed=0):
+        super(Net, self).__init__()
+        self.shape = shape
+        self.seed = seed
+
+    def construct(self, mean, stddev):
+        return C.normal(self.shape, mean, stddev, self.seed)
+
+
+def test_net_1D():
+    seed = 10
+    shape = (3, 2, 4)
+    mean = 1.0
+    stddev = 1.0
+    net = Net(shape, seed)
+    tmean, tstddev = Tensor(mean, mstype.float32), Tensor(stddev, mstype.float32)
+    output = net(tmean, tstddev)
+    assert output.shape == (3, 2, 4)
+
+
+def test_net_ND():
+    seed = 10
+    shape = (3, 1, 2)
+    mean = np.array([[[1], [2]], [[3], [4]], [[5], [6]]]).astype(np.float32)
+    stddev = np.array([1.0]).astype(np.float32)
+    net = Net(shape, seed)
+    tmean, tstddev = Tensor(mean, mstype.float32), Tensor(stddev, mstype.float32)
+    output = net(tmean, tstddev)
+    assert output.shape == (3, 2, 2)
diff --git a/tests/st/ops/gpu/test_smoothl1loss_op.py b/tests/st/ops/gpu/test_smoothl1loss_op.py
new file mode 100644
index 0000000000..040f404eb0
--- /dev/null
+++ b/tests/st/ops/gpu/test_smoothl1loss_op.py
@@ -0,0 +1,81 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import composite as C
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=True)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_smoothl1loss():
+    np.random.seed(42)
+    prediction = np.random.randn(20).astype(np.float32)
+    target = np.random.randn(20).astype(np.float32)
+    sigma = 1.0
+
+    net = nn.SmoothL1Loss(sigma)
+    loss = net(Tensor(prediction), Tensor(target))
+    expect = [0.46941718, 0.00382918, 0.16829303, 2.447778, 0.04812113, 0.05953304,
+              2.2302065, 0.07672881, 0.00860204, 0.34798968, 0.00956192, 1.818008,
+              0.03262977, 0.36599946, 2.047463, 0.2168481, 0.7216947, 1.7739174,
+              0.08826803, 1.109165]
+    assert np.allclose(loss.asnumpy(), expect)
+
+
+
+class Grad(nn.Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
+        self.network = network
+
+    def construct(self, x1, x2, sens):
+        gout = self.grad(self.network)(x1, x2, sens)
+        return gout
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_smoothl1loss_grad():
+    np.random.seed(42)
+    prediction = np.random.randn(20).astype(np.float32)
+    target = np.random.randn(20).astype(np.float32)
+    sens = np.random.randn(20).astype(np.float32)
+    sigma = 1.0
+
+    net = nn.SmoothL1Loss(sigma)
+    grad = Grad(net)
+    dx = grad(Tensor(prediction), Tensor(target), Tensor(sens))
+
+    dx1_expect = [-0.71552587, 0.01499678, -0.06709455, -0.30110368, -0.45868093,
+                  0.24838912, -0.46063876, 0.41411355, 0.04507046, -1.4708229,
+                  0.04481723, 0.38508227, -0.17292616, -0.52333146, -1.0309995,
+                  0.61330026, 0.83921754, -0.3092124, 0.1391843, -0.9755451]
+
+    dx2_expect = [0.71552587, -0.01499678, 0.06709455, 0.30110368, 0.45868093,
+                  -0.24838912, 0.46063876, -0.41411355, -0.04507046, 1.4708229,
+                  -0.04481723, -0.38508227, 0.17292616, 0.52333146, 1.0309995,
+                  -0.61330026, -0.83921754, 0.3092124, -0.1391843, 0.9755451]
+
+    assert np.allclose(dx[0].asnumpy(), dx1_expect)
+    assert np.allclose(dx[1].asnumpy(), dx2_expect)
diff --git a/tests/st/pynative/test_implicit_conversion.py b/tests/st/pynative/test_implicit_conversion.py
new file mode 100644
index 0000000000..fce6c24cbb
--- /dev/null
+++ b/tests/st/pynative/test_implicit_conversion.py
@@ -0,0 +1,81 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test implicit conversion """
+import numpy as np
+
+from mindspore import Tensor
+
+
+def test_float_tensor_and_int_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = 2
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2.1, 2.2, 2.3], [2.4, 2.5, 2.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_float_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3.3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4.3, 3.3], [3.3, 4.3]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_int_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4, 3], [3, 4]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_and_int_tensor_add():
+    x = True
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 3, 4], [5, 6, 7]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+def test_float_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_float_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float64))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float64))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_int_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int16))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 4, 6], [8, 10, 12]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 1.2, 1.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
diff --git a/tests/st/pynative/test_pynative_hook.py b/tests/st/pynative/test_pynative_hook.py
new file mode 100644
index 0000000000..0ce4ba4f69
--- /dev/null
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -0,0 +1,198 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+import mindspore.nn as nn
+import mindspore.common.dtype as mstype
+
+from mindspore import Tensor
+from mindspore import context
+from mindspore import ParameterTuple
+from mindspore.nn import Momentum
+from mindspore.nn import WithLossCell
+from mindspore.ops import composite as C
+from mindspore.ops import operations as P
+from mindspore.common.initializer import TruncatedNormal
+
+context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+
+
+def weight_variable():
+    """weight initial"""
+    return TruncatedNormal(0.02)
+
+
+def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
+    """weight initial for conv layer"""
+    weight = weight_variable()
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=kernel_size, stride=stride, padding=padding,
+                     weight_init=weight, has_bias=False, pad_mode="valid")
+
+
+def fc_with_initialize(input_channels, out_channels):
+    """weight initial for fc layer"""
+    weight = weight_variable()
+    bias = weight_variable()
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+class test_custom_hook_function_base():
+    def __init__(self):
+        pass
+
+    def test_custom_hook_function(self, hook_function, cell_hook_function):
+        return hook_function, cell_hook_function
+
+
+def cell_hook_function_print_grad(cell_id, grad_input, grad_output):
+    assert grad_output[0].asnumpy().shape == (32, 6, 14, 14)
+    assert grad_input[0].asnumpy().shape == (32, 16, 10, 10)
+
+
+def custom_hook_function_print_and_save_grad(grad_out):
+    assert grad_out[0].asnumpy().shape == (32, 6, 28, 28)
+
+
+class LeNet5(nn.Cell):
+    def __init__(self, hook_function, cell_hook_function, num_class=10):
+        super(LeNet5, self).__init__()
+        self.num_class = num_class
+        self.batch_size = 32
+        self.conv1 = conv(1, 6, 5)
+        self.conv2 = conv(6, 16, 5)
+        self.conv1.register_backward_hook(cell_hook_function)
+        self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
+        self.fc2 = fc_with_initialize(120, 84)
+        self.fc3 = fc_with_initialize(84, self.num_class)
+        self.relu = nn.ReLU()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.reshape = P.Reshape()
+        self.hook = P.HookBackward(hook_function)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.hook(x)
+        x = self.max_pool2d(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.max_pool2d(x)
+        x = self.reshape(x, (self.batch_size, -1))
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.relu(x)
+        x = self.fc3(x)
+        return x
+
+
+class GradWrap(nn.Cell):
+    """ GradWrap definition """
+    def __init__(self, network):
+        super(GradWrap, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(filter(lambda x: x.requires_grad, network.get_parameters()))
+
+    def construct(self, x, label):
+        weights = self.weights
+        return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label)
+
+
+class test_custom_cell_base():
+    def __init__(self):
+        pass
+
+    def test_custom_cell_function(self, cell):
+        return cell
+
+
+class MulAdd(nn.Cell):
+    def __init__(self):
+        super(MulAdd, self).__init__()
+
+    def construct(self, x, y):
+        return 2 * x + y
+
+    def bprop(self, x, y, out, dout):
+        assert x.asnumpy() == 1.0
+        assert y.asnumpy() == 2.0
+        assert out.asnumpy() == 4.0
+        assert dout.asnumpy() == 1.0
+        return dout, y
+
+
+class Ms_Cell(nn.Cell):
+    def __init__(self):
+        super(Ms_Cell, self).__init__()
+        self.relu = P.ReLU()
+
+    def construct(self, x):
+        return self.relu(x)
+
+    def bprop(self, x, out, dout):
+        dout = Tensor(np.ones([5, 5]).astype(np.float32))
+        assert dout.shape == (5, 5)
+        return dout
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_lenet_train_hook_function_print_and_save_grad():
+    hook = test_custom_hook_function_base()
+    function = hook.test_custom_hook_function(custom_hook_function_print_and_save_grad,
+                                              cell_hook_function_print_grad)
+    net = LeNet5(hook_function=function[0], cell_hook_function=function[1])
+    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    net_with_criterion = WithLossCell(net, criterion)
+    train_network = GradWrap(net_with_criterion)
+    train_network.set_train()
+
+    input_data = Tensor(np.ones([net.batch_size, 1, 32, 32]).astype(np.float32) * 0.01)
+    label = Tensor(np.ones([net.batch_size, net.num_class]).astype(np.float32))
+    output = net(Tensor(input_data))
+    criterion(output, label)
+    grads = train_network(input_data, label)
+    success = optimizer(grads)
+    assert success
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_custom_bprop_and_Cell_MulAdd():
+    custom_cell = test_custom_cell_base()
+    mul_add = custom_cell.test_custom_cell_function(MulAdd())
+    mul_add.bprop_debug = True
+    C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32))
+    assert C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32)) == \
+           (Tensor(1.0, mstype.float32), Tensor(2.0, mstype.float32))
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_custom_bprop_and_Cell_Ms_Cell():
+    custom_cell = test_custom_cell_base()
+    ms_Cell = custom_cell.test_custom_cell_function(Ms_Cell())
+    ms_Cell.bprop_debug = True
+    assert C.grad_all(ms_Cell)(Tensor(1, mstype.float32)) == (Tensor(1.0, mstype.float32),)
+    
\ No newline at end of file
diff --git a/tests/st/pynative/test_ascend_lenet.py b/tests/st/pynative/test_pynative_lenet.py
similarity index 98%
rename from tests/st/pynative/test_ascend_lenet.py
rename to tests/st/pynative/test_pynative_lenet.py
index 021c71d9cd..c6166d0517 100644
--- a/tests/st/pynative/test_ascend_lenet.py
+++ b/tests/st/pynative/test_pynative_lenet.py
@@ -157,4 +157,5 @@ def test_ascend_pynative_lenet():
         total_time = total_time + cost_time
 
         print("======epoch: ", epoch, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time)
-    assert loss_output.asnumpy() < 0.1
+    assert loss_output.asnumpy() < 0.004
+    assert loss_output.asnumpy() > 0.003
diff --git a/tests/st/pynative/test_pynative_resnet50.py b/tests/st/pynative/test_pynative_resnet50.py
new file mode 100644
index 0000000000..de9ecebb9c
--- /dev/null
+++ b/tests/st/pynative/test_pynative_resnet50.py
@@ -0,0 +1,432 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import time
+import random
+import numpy as np
+import pytest
+
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.c_transforms as C
+import mindspore.dataset.transforms.vision.c_transforms as vision
+import mindspore.nn as nn
+import mindspore.ops.functional as F
+
+from mindspore import Tensor
+from mindspore import context
+from mindspore import ParameterTuple
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops import composite as CP
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.common.initializer import initializer
+from mindspore.nn.wrap.cell_wrapper import WithLossCell
+
+random.seed(1)
+np.random.seed(1)
+ds.config.set_seed(1)
+
+
+def weight_variable(shape):
+    return initializer('XavierUniform', shape=shape, dtype=mstype.float32)
+
+
+def weight_variable_uniform(shape):
+    return initializer('Uniform', shape=shape, dtype=mstype.float32)
+
+
+def weight_variable_0(shape):
+    zeros = np.zeros(shape).astype(np.float32)
+    return Tensor(zeros)
+
+
+def weight_variable_1(shape):
+    ones = np.ones(shape).astype(np.float32)
+    return Tensor(ones)
+
+
+def conv3x3(in_channels, out_channels, stride=1, padding=0):
+    """3x3 convolution """
+    weight_shape = (out_channels, in_channels, 3, 3)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=3, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def conv1x1(in_channels, out_channels, stride=1, padding=0):
+    """1x1 convolution"""
+    weight_shape = (out_channels, in_channels, 1, 1)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=1, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def conv7x7(in_channels, out_channels, stride=1, padding=0):
+    """1x1 convolution"""
+    weight_shape = (out_channels, in_channels, 7, 7)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def bn_with_initialize(out_channels):
+    shape = (out_channels)
+    mean = weight_variable_0(shape)
+    var = weight_variable_1(shape)
+    beta = weight_variable_0(shape)
+    gamma = weight_variable_uniform(shape)
+    bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
+                        beta_init=beta, moving_mean_init=mean, moving_var_init=var)
+    return bn
+
+
+def bn_with_initialize_last(out_channels):
+    shape = (out_channels)
+    mean = weight_variable_0(shape)
+    var = weight_variable_1(shape)
+    beta = weight_variable_0(shape)
+    gamma = weight_variable_uniform(shape)
+    bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
+                        beta_init=beta, moving_mean_init=mean, moving_var_init=var)
+    return bn
+
+
+def fc_with_initialize(input_channels, out_channels):
+    weight_shape = (out_channels, input_channels)
+    weight = weight_variable(weight_shape)
+    bias_shape = (out_channels)
+    bias = weight_variable_uniform(bias_shape)
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+class ResidualBlock(nn.Cell):
+    expansion = 4
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride=1):
+        super(ResidualBlock, self).__init__()
+
+        out_chls = out_channels // self.expansion
+        self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0)
+        self.bn1 = bn_with_initialize(out_chls)
+
+        self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0)
+        self.bn2 = bn_with_initialize(out_chls)
+
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.bn3 = bn_with_initialize_last(out_channels)
+
+        self.relu = P.ReLU()
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class ResidualBlockWithDown(nn.Cell):
+    expansion = 4
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride=1,
+                 down_sample=False):
+        super(ResidualBlockWithDown, self).__init__()
+
+        out_chls = out_channels // self.expansion
+        self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0)
+        self.bn1 = bn_with_initialize(out_chls)
+
+        self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0)
+        self.bn2 = bn_with_initialize(out_chls)
+
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.bn3 = bn_with_initialize_last(out_channels)
+
+        self.relu = P.ReLU()
+        self.downSample = down_sample
+
+        self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0)
+        self.bn_down_sample = bn_with_initialize(out_channels)
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        identity = self.conv_down_sample(identity)
+        identity = self.bn_down_sample(identity)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class MakeLayer0(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer0, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=stride)
+        self.c = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+
+        return x
+
+
+class MakeLayer1(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer1, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+        self.d = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+        x = self.d(x)
+
+        return x
+
+
+class MakeLayer2(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer2, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+        self.d = block(out_channels, out_channels, stride=1)
+        self.e = block(out_channels, out_channels, stride=1)
+        self.f = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+        x = self.d(x)
+        x = self.e(x)
+        x = self.f(x)
+
+        return x
+
+
+class MakeLayer3(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer3, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+
+        return x
+
+
+class ResNet(nn.Cell):
+
+    def __init__(self, block, num_classes=100, batch_size=32):
+        super(ResNet, self).__init__()
+        self.batch_size = batch_size
+        self.num_classes = num_classes
+
+        self.conv1 = conv7x7(3, 64, stride=2, padding=0)
+
+        self.bn1 = bn_with_initialize(64)
+        self.relu = P.ReLU()
+        self.maxpool = P.MaxPoolWithArgmax(ksize=3, strides=2, padding="SAME")
+
+        self.layer1 = MakeLayer0(block, in_channels=64, out_channels=256, stride=1)
+        self.layer2 = MakeLayer1(block, in_channels=256, out_channels=512, stride=2)
+        self.layer3 = MakeLayer2(block, in_channels=512, out_channels=1024, stride=2)
+        self.layer4 = MakeLayer3(block, in_channels=1024, out_channels=2048, stride=2)
+
+        self.pool = P.ReduceMean(keep_dims=True)
+        self.squeeze = P.Squeeze(axis=(2, 3))
+        self.fc = fc_with_initialize(512 * block.expansion, num_classes)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)[0]
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.pool(x, (2, 3))
+        x = self.squeeze(x)
+        x = self.fc(x)
+        return x
+
+
+def resnet50(batch_size, num_classes):
+    return ResNet(ResidualBlock, num_classes, batch_size)
+
+
+def create_dataset(repeat_num=1, training=True, batch_size=32):
+    data_home = "/home/workspace/mindspore_dataset"
+    data_dir = data_home + "/cifar-10-batches-bin"
+    if not training:
+        data_dir = data_home + "/cifar-10-verify-bin"
+    data_set = ds.Cifar10Dataset(data_dir)
+
+    resize_height = 224
+    resize_width = 224
+    rescale = 1.0 / 255.0
+    shift = 0.0
+
+    # define map operations
+    random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
+    random_horizontal_op = vision.RandomHorizontalFlip()
+    # interpolation default BILINEAR
+    resize_op = vision.Resize((resize_height, resize_width))
+    rescale_op = vision.Rescale(rescale, shift)
+    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023))
+    changeswap_op = vision.HWC2CHW()
+    type_cast_op = C.TypeCast(mstype.int32)
+
+    c_trans = []
+    if training:
+        c_trans = [random_crop_op, random_horizontal_op]
+    c_trans += [resize_op, rescale_op, normalize_op,
+                changeswap_op]
+
+    # apply map operations on images
+    data_set = data_set.map(input_columns="label", operations=type_cast_op)
+    data_set = data_set.map(input_columns="image", operations=c_trans)
+
+    # apply shuffle operations
+    data_set = data_set.shuffle(buffer_size=1000)
+
+    # apply batch operations
+    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)
+
+    # apply repeat operations
+    data_set = data_set.repeat(repeat_num)
+
+    return data_set
+
+
+class CrossEntropyLoss(nn.Cell):
+    def __init__(self):
+        super(CrossEntropyLoss, self).__init__()
+        self.cross_entropy = P.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean()
+        self.one_hot = P.OneHot()
+        self.one = Tensor(1.0, mstype.float32)
+        self.zero = Tensor(0.0, mstype.float32)
+
+    def construct(self, logits, label):
+        label = self.one_hot(label, F.shape(logits)[1], self.one, self.zero)
+        loss = self.cross_entropy(logits, label)[0]
+        loss = self.mean(loss, (-1,))
+        return loss
+
+
+class GradWrap(Cell):
+    """ GradWrap definition """
+
+    def __init__(self, network):
+        super(GradWrap, self).__init__()
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+
+    def construct(self, x, label):
+        weights = self.weights
+        return CP.grad_by_list(self.network, weights)(x, label)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_resnet50():
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+
+    batch_size = 32
+    num_classes = 10
+    net = resnet50(batch_size, num_classes)
+    criterion = CrossEntropyLoss()
+    optimizer = Momentum(learning_rate=0.01, momentum=0.9,
+                         params=filter(lambda x: x.requires_grad, net.get_parameters()))
+
+    net_with_criterion = WithLossCell(net, criterion)
+    net_with_criterion.set_grad()
+    train_network = GradWrap(net_with_criterion)
+    train_network.set_train()
+
+    step = 0
+    max_step = 20
+    data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size)
+    for element in data_set.create_dict_iterator():
+        step = step + 1
+        if step > max_step:
+            break
+        start_time = time.time()
+        input_data = Tensor(element["image"])
+        input_label = Tensor(element["label"])
+        loss_output = net_with_criterion(input_data, input_label)
+        grads = train_network(input_data, input_label)
+        optimizer(grads)
+        end_time = time.time()
+        cost_time = end_time - start_time
+        print("======step: ", step, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time)
+        if step > 1:
+            assert cost_time < 0.3
+        
\ No newline at end of file
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index dcc798165b..880a281037 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -17,6 +17,7 @@ message("PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}")
 message("PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
 include_directories(${PYTHON_INCLUDE_DIRS})
 include_directories(${MS_CCSRC_PATH})
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/stub/runtime/)
 include_directories(${CMAKE_BINARY_DIR})
@@ -27,12 +28,20 @@ link_directories(${MS_CCSRC_BUILD_PATH})
 
 if(ENABLE_MINDDATA)
     add_definitions(-D ENABLE_MINDDATA)
-    link_directories(${MS_CCSRC_BUILD_PATH}/dataset)
-    link_directories(${MS_CCSRC_BUILD_PATH}/mindrecord)
+    link_directories(${MS_CCSRC_BUILD_PATH}/minddata/dataset)
+    link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord)
 endif()
 # fetch ut test files
 if(ENABLE_MINDDATA)
-    file(GLOB_RECURSE UT_SRCS ./*.cc)
+    file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc)
+    if(NOT ENABLE_PYTHON)
+        set(PYTHON_RELATED_SRCS
+            dataset/filter_op_test.cc
+            dataset/voc_op_test.cc
+            dataset/manifest_op_test.cc
+        )
+        list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS})
+    endif()
 else()
     file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc)
     foreach(OBJ ${TEMP_UT_SRCS})
@@ -43,78 +52,83 @@ else()
 endif()
 
 file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "../../../mindspore/ccsrc/ir/*.cc"
+        "../../../mindspore/core/base/*.cc"
+        "../../../mindspore/core/abstract/*.cc"
+        "../../../mindspore/core/ir/*.cc"
         "../../../mindspore/ccsrc/common/*.cc"
         "../../../mindspore/ccsrc/utils/*.cc"
-        "../../../mindspore/ccsrc/parallel/*.cc"
-        "../../../mindspore/ccsrc/pipeline/parse/*.cc"
-        "../../../mindspore/ccsrc/pipeline/static_analysis/*.cc"
-        "../../../mindspore/ccsrc/pipeline/pipeline.cc"
-        "../../../mindspore/ccsrc/pipeline/resource.cc"
-        "../../../mindspore/ccsrc/pipeline/pass.cc"
-        "../../../mindspore/ccsrc/pipeline/action.cc"
-        "../../../mindspore/ccsrc/pipeline/validator.cc"
-        "../../../mindspore/ccsrc/pipeline/remove_value_node_dup.cc"
-        "../../../mindspore/ccsrc/optimizer/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/parse/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/static_analysis/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/pipeline.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/resource.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/pass.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/action.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/validator.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc"
+        "../../../mindspore/ccsrc/frontend/optimizer/*.cc"
+        "../../../mindspore/ccsrc/frontend/parallel/*.cc"
         "../../../mindspore/ccsrc/debug/*.cc"
-        "../../../mindspore/ccsrc/operator/*.cc"
-        "../../../mindspore/ccsrc/transform/*.cc"
-        "../../../mindspore/ccsrc/session/anf_runtime_algorithm.cc"
-        "../../../mindspore/ccsrc/session/ascend_session.cc"
-        "../../../mindspore/ccsrc/session/ascend_control_parser.cc"
-        "../../../mindspore/ccsrc/session/kernel_graph.cc"
-        "../../../mindspore/ccsrc/session/session_basic.cc"
-        "../../../mindspore/ccsrc/session/session_factory.cc"
+        "../../../mindspore/ccsrc/frontend/operator/*.cc"
+        "../../../mindspore/ccsrc/transform/graph_ir/*.cc"
+        "../../../mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc"
+        "../../../mindspore/ccsrc/backend/session/ascend_session.cc"
+        "../../../mindspore/ccsrc/backend/session/ascend_control_parser.cc"
+        "../../../mindspore/ccsrc/backend/session/kernel_graph.cc"
+        "../../../mindspore/ccsrc/backend/session/session_basic.cc"
+        "../../../mindspore/ccsrc/backend/session/session_factory.cc"
         "../../../mindspore/ccsrc/vm/*.cc"
-        "../../../mindspore/ccsrc/pynative/*.cc"
+        "../../../mindspore/ccsrc/pipeline/pynative/*.cc"
         "../../../mindspore/ccsrc/pybind_api/*.cc"
-        "../../../mindspore/ccsrc/kernel/akg/*.cc"
-        "../../../mindspore/ccsrc/kernel/kash/*.cc"
-        "../../../mindspore/ccsrc/kernel/cce/*.cc"
-        "../../../mindspore/ccsrc/kernel/rts/*.cc"
-        "../../../mindspore/ccsrc/kernel/hccl/*.cc"
-        "../../../mindspore/ccsrc/kernel/kernel_query.cc"
-        "../../../mindspore/ccsrc/kernel/kernel_build_info.cc"
-        "../../../mindspore/ccsrc/pre_activate/ascend/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/common/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/gpu/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/mem_reuse/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/pass/*.cc"
-        "../../../mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc"
-        "../../../mindspore/ccsrc/kernel/rts/rt_kernel_info.cc"
-        "../../../mindspore/ccsrc/kernel/common_utils.cc"
-        "../../../mindspore/ccsrc/kernel/oplib/*.cc"
-        "../../../mindspore/ccsrc/kernel/tbe/*.cc"
-        "../../../mindspore/ccsrc/device/kernel_runtime.cc"
-        "../../../mindspore/ccsrc/device/memory_manager.cc"
-        "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc"
-        "../../../mindspore/ccsrc/device/kernel_info.cc"
-        "../../../mindspore/ccsrc/device/ascend/profiling/*.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_select_ascend.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc"
-        "../../../mindspore/ccsrc/device/convert_tensor_utils.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/akg/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kash/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/rts/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/hccl/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/ascend/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/common/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/gpu/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/mem_reuse/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/pass/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/common_utils.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/oplib/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/tbe/*.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_runtime.cc"
+        "../../../mindspore/ccsrc/runtime/device/memory_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_info.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/profiling/*.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc"
+        "../../../mindspore/ccsrc/runtime/device/convert_tensor_utils.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"
         "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"
         "../../../mindspore/ccsrc/predict/predict.cc"
         "../../../mindspore/ccsrc/predict/converter/*.cc"
         "../../../mindspore/ccsrc/predict/converter/attr_utils/*.cc"
         "../../../mindspore/ccsrc/predict/converter/lite_model/*.cc"
         "../../../mindspore/ccsrc/predict/converter/lite_model/operations/*.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
         )
 
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
-list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
-list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/core/ir/lite/tensor.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/util.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/scheduler.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
diff --git a/tests/ut/cpp/pipeline/static_analysis/abstract_test.cc b/tests/ut/cpp/abstract/abstract_test.cc
similarity index 90%
rename from tests/ut/cpp/pipeline/static_analysis/abstract_test.cc
rename to tests/ut/cpp/abstract/abstract_test.cc
index 93baf86c3e..2e3a2a8d1a 100644
--- a/tests/ut/cpp/pipeline/static_analysis/abstract_test.cc
+++ b/tests/ut/cpp/abstract/abstract_test.cc
@@ -18,13 +18,13 @@
 
 #include "common/common_test.h"
 
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "abstract/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/dshape_test.cc b/tests/ut/cpp/abstract/dshape_test.cc
similarity index 97%
rename from tests/ut/cpp/pipeline/static_analysis/dshape_test.cc
rename to tests/ut/cpp/abstract/dshape_test.cc
index ae18f7730b..da0e9ed3ee 100644
--- a/tests/ut/cpp/pipeline/static_analysis/dshape_test.cc
+++ b/tests/ut/cpp/abstract/dshape_test.cc
@@ -18,7 +18,7 @@
 
 #include "common/common_test.h"
 
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/static_analysis/utils_test.cc b/tests/ut/cpp/abstract/utils_test.cc
similarity index 95%
rename from tests/ut/cpp/pipeline/static_analysis/utils_test.cc
rename to tests/ut/cpp/abstract/utils_test.cc
index dceef71b02..33cada28d7 100644
--- a/tests/ut/cpp/pipeline/static_analysis/utils_test.cc
+++ b/tests/ut/cpp/abstract/utils_test.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 #include "common/common_test.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/ir/base_test.cc b/tests/ut/cpp/base/base_test.cc
similarity index 99%
rename from tests/ut/cpp/ir/base_test.cc
rename to tests/ut/cpp/base/base_test.cc
index 0b4e8a637b..71a7999e0f 100644
--- a/tests/ut/cpp/ir/base_test.cc
+++ b/tests/ut/cpp/base/base_test.cc
@@ -17,7 +17,7 @@
 
 #include "common/common_test.h"
 #include "utils/any.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/common/backend_common_test.cc b/tests/ut/cpp/common/backend_common_test.cc
index 060b170a8c..3710349298 100644
--- a/tests/ut/cpp/common/backend_common_test.cc
+++ b/tests/ut/cpp/common/backend_common_test.cc
@@ -20,11 +20,11 @@
 #include <memory>
 
 #include "utils/log_adapter.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
-#include "session/ascend_session.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
+#include "backend/session/ascend_session.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
 #include "ir/anf.h"
 #include "ir/manager.h"
 
diff --git a/tests/ut/cpp/common/backend_common_test.h b/tests/ut/cpp/common/backend_common_test.h
index fb3334182a..f5bfc9d6dd 100644
--- a/tests/ut/cpp/common/backend_common_test.h
+++ b/tests/ut/cpp/common/backend_common_test.h
@@ -17,7 +17,7 @@
 #define TESTS_UT_CPP_COMMON_UT_BACKEND_COMMON_H_
 #include "common/common_test.h"
 #include "utils/context/ms_context.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 class BackendCommon : public UT::Common {
diff --git a/tests/ut/cpp/common/py_func_graph_fetcher.h b/tests/ut/cpp/common/py_func_graph_fetcher.h
index 98552a96b5..d864842760 100644
--- a/tests/ut/cpp/common/py_func_graph_fetcher.h
+++ b/tests/ut/cpp/common/py_func_graph_fetcher.h
@@ -22,8 +22,8 @@
 #include "ir/primitive.h"
 #include "ir/manager.h"
 #include "ir/func_graph.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/parse.h"
 #include "./common.h"
 
 namespace UT {
diff --git a/tests/ut/cpp/common/test_main.cc b/tests/ut/cpp/common/test_main.cc
index f0cfc1778c..fa456ed260 100644
--- a/tests/ut/cpp/common/test_main.cc
+++ b/tests/ut/cpp/common/test_main.cc
@@ -16,8 +16,8 @@
 #include <iostream>
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
   extern void InitSubModulesLogLevel();
diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt
index 129864ca0f..8bbf42a640 100644
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -11,6 +11,7 @@ SET(DE_UT_SRCS
         interrupt_test.cc
         image_folder_op_test.cc
         buddy_test.cc
+        bounding_box_augment_op_test.cc
         arena_test.cc
         btree_test.cc
         center_crop_op_test.cc
@@ -35,20 +36,26 @@ SET(DE_UT_SRCS
         project_op_test.cc
         queue_test.cc
         random_crop_op_test.cc
+        random_crop_with_bbox_op_test.cc
         random_crop_decode_resize_op_test.cc
         random_crop_and_resize_op_test.cc
+        random_crop_and_resize_with_bbox_op_test.cc
         random_color_adjust_op_test.cc
         random_horizontal_flip_op_test.cc
+        random_horizontal_flip_with_bbox_test.cc
         random_resize_op_test.cc
+        random_resize_with_bbox_op_test.cc
         random_rotation_op_test.cc
         random_vertical_flip_op_test.cc
+        random_vertical_flip_with_bbox_op_test.cc
         rename_op_test.cc
         repeat_op_test.cc
         skip_op_test.cc
         rescale_op_test.cc
         resize_bilinear_op_test.cc
         resize_op_test.cc
-	schema_test.cc
+        resize_with_bbox_op_test.cc
+	    schema_test.cc
         shuffle_op_test.cc
         stand_alone_samplers_test.cc
         status_test.cc
@@ -83,6 +90,8 @@ SET(DE_UT_SRCS
         concatenate_op_test.cc
         cyclic_array_test.cc
         perf_data_test.cc
+        c_api_test.cc
+		tensor_op_fusion_pass_test.cc
         )
 
 add_executable(de_ut_tests ${DE_UT_SRCS})
diff --git a/tests/ut/cpp/dataset/arena_test.cc b/tests/ut/cpp/dataset/arena_test.cc
index e8698ad979..10d27b51c6 100644
--- a/tests/ut/cpp/dataset/arena_test.cc
+++ b/tests/ut/cpp/dataset/arena_test.cc
@@ -15,7 +15,7 @@
  */
 
 #include <string>
-#include "dataset/util/arena.h"
+#include "minddata/dataset/util/arena.h"
 #include "common/common.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/batch_op_test.cc b/tests/ut/cpp/dataset/batch_op_test.cc
index a04da06e4e..3e1f3c0b32 100644
--- a/tests/ut/cpp/dataset/batch_op_test.cc
+++ b/tests/ut/cpp/dataset/batch_op_test.cc
@@ -16,14 +16,14 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 namespace de = mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/bit_functions_test.cc b/tests/ut/cpp/dataset/bit_functions_test.cc
index 02b6a25f76..cf1c1562db 100644
--- a/tests/ut/cpp/dataset/bit_functions_test.cc
+++ b/tests/ut/cpp/dataset/bit_functions_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 #include "common/common.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc b/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc
new file mode 100644
index 0000000000..dc59d39fac
--- /dev/null
+++ b/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "BoundingBoxAugmentOp";
+
+class MindDataTestBoundingBoxAugmentOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestBoundingBoxAugmentOp() : UT::CVOP::BBOXOP::BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestBoundingBoxAugmentOp, TestOp) {
+  MS_LOG(INFO) << "Doing testBoundingBoxAugment.";
+  TensorTable results;
+  std::unique_ptr<BoundingBoxAugmentOp> op =
+    std::make_unique<BoundingBoxAugmentOp>(std::make_shared<RandomRotationOp>(90, 90), 1);
+  for (const auto &row : images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(row, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+}
diff --git a/tests/ut/cpp/dataset/btree_test.cc b/tests/ut/cpp/dataset/btree_test.cc
index 67b6c4e6c7..9fa4fce812 100644
--- a/tests/ut/cpp/dataset/btree_test.cc
+++ b/tests/ut/cpp/dataset/btree_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include <sstream>
-#include "dataset/util/btree.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/system_pool.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc
new file mode 100644
index 0000000000..902bc9a43b
--- /dev/null
+++ b/tests/ut/cpp/dataset/c_api_test.cc
@@ -0,0 +1,771 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "utils/log_adapter.h"
+#include "common/utils.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "securec.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/status.h"
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/include/samplers.h"
+
+using namespace mindspore::dataset::api;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+using mindspore::dataset::Tensor;
+using mindspore::dataset::Status;
+using mindspore::dataset::BorderType;
+
+
+class MindDataTestPipeline : public UT::DatasetOpTesting {
+ protected:
+};
+
+
+TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
+  EXPECT_TRUE(resize_op != nullptr);
+
+  std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
+  EXPECT_TRUE(center_crop_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({resize_op, center_crop_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 40);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestUniformAugWithOps) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 1;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
+  EXPECT_TRUE(resize_op != nullptr);
+
+  std::shared_ptr<TensorOperation> random_crop_op = vision::RandomCrop({28, 28});
+  EXPECT_TRUE(random_crop_op != nullptr);
+
+  std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
+  EXPECT_TRUE(center_crop_op != nullptr);
+
+  std::shared_ptr<TensorOperation> uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2);
+  EXPECT_TRUE(uniform_aug_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({resize_op, uniform_aug_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomFlip) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
+  EXPECT_TRUE(random_vertical_flip_op != nullptr);
+
+  std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5);
+  EXPECT_TRUE(random_horizontal_flip_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
+  std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = PKSampler(3);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = RandomSampler(false, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = SequentialSampler(0, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
+  sampl = WeightedRandomSampler(weights, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
+  sampl = SubsetRandomSampler(indices);
+  EXPECT_NE(sampl, nullptr);
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 12);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestPad) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric);
+  EXPECT_TRUE(pad_op1 != nullptr);
+
+  std::shared_ptr<TensorOperation> pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge);
+  EXPECT_TRUE(pad_op2 != nullptr);
+
+  std::shared_ptr<TensorOperation> pad_op3 = vision::Pad({1, 4});
+  EXPECT_TRUE(pad_op3 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({pad_op1, pad_op2, pad_op3});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestCutOut) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> cut_out1 = vision::CutOut(30, 5);
+  EXPECT_TRUE(cut_out1!= nullptr);
+
+  std::shared_ptr<TensorOperation> cut_out2 = vision::CutOut(30);
+  EXPECT_TRUE(cut_out2 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({cut_out1, cut_out2});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestNormalize) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0});
+  EXPECT_TRUE(normalize != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({normalize});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestDecode) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> decode = vision::Decode(true);
+  EXPECT_TRUE(decode != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({decode});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+  EXPECT_EQ(i, 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestShuffleDataset) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Shuffle operation on ds
+  int32_t shuffle_size = 10;
+  ds = ds->Shuffle(shuffle_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestCifar10Dataset) {
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
+  std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomColorAdjust) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5});
+  EXPECT_TRUE(random_color_adjust1 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5},
+                                                                                    {0.5, 0.5});
+  EXPECT_TRUE(random_color_adjust2 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5},
+                                                                             {0.25, 0.5});
+  EXPECT_TRUE(random_color_adjust3 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust4 = vision::RandomColorAdjust();
+  EXPECT_TRUE(random_color_adjust4 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomRotation) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_rotation_op = vision::RandomRotation({-180, 180});
+  EXPECT_TRUE(random_rotation_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_rotation_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestProjectMap) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
+  EXPECT_TRUE(random_vertical_flip_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Project operation on ds
+  std::vector<std::string> column_project = {"image"};
+  ds = ds->Project(column_project);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/cache_op_test.cc b/tests/ut/cpp/dataset/cache_op_test.cc
new file mode 100644
index 0000000000..bdb7c861b2
--- /dev/null
+++ b/tests/ut/cpp/dataset/cache_op_test.cc
@@ -0,0 +1,579 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "minddata/dataset/util/storage_container.h"  // lint !e322
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/data_schema.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::dataset::CacheClient;
+using mindspore::dataset::TaskGroup;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+class MindDataTestCacheOp : public UT::DatasetOpTesting {
+ public:
+  void SetUp() override {
+    DatasetOpTesting::SetUp();
+    GlobalInit();
+  }
+};
+
+TEST_F(MindDataTestCacheOp, TestCacheServer) {
+  Status rc;
+  CacheClient myClient(1, 0, true);  // use arbitrary session of 1, size of 0, spilling is true
+  // cksum value of 1 for CreateCache here...normally you do not directly create a cache and the cksum arg is generated.
+  rc = myClient.CreateCache(1, true);
+  EXPECT_TRUE(rc.IsOk());
+  std::cout << myClient << std::endl;
+
+  // Create a schema using the C api's
+  int32_t rank = 0;  // not used
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  std::unordered_map<std::string, int32_t> map;
+  rc = testSchema->GetColumnNameMap(&map);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Test the CacheSchema api
+  rc = myClient.CacheSchema(map);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Create a tensor, take a snapshot and restore it back, and compare.
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  t->SetItemAt<uint64_t>({0, 0}, 1);
+  t->SetItemAt<uint64_t>({0, 1}, 2);
+  t->SetItemAt<uint64_t>({0, 2}, 3);
+  t->SetItemAt<uint64_t>({1, 0}, 4);
+  t->SetItemAt<uint64_t>({1, 1}, 5);
+  t->SetItemAt<uint64_t>({1, 2}, 6);
+  std::cout << *t << std::endl;
+  TensorTable tbl;
+  TensorRow row;
+  row.push_back(t);
+  int64_t row_id;
+  rc = myClient.WriteRow(row, &row_id);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Switch off build phase.
+  rc = myClient.BuildPhaseDone();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Now restore from cache.
+  row.clear();
+  rc = myClient.GetRows({row_id}, &tbl);
+  row = tbl.front();
+  EXPECT_TRUE(rc.IsOk());
+  auto r = row.front();
+  std::cout << *r << std::endl;
+  // Compare
+  bool cmp = (*t == *r);
+  EXPECT_TRUE(cmp);
+
+  // Get back the schema and verify
+  std::unordered_map<std::string, int32_t> map_out;
+  rc = myClient.FetchSchema(&map_out);
+  EXPECT_TRUE(rc.IsOk());
+  cmp = (map_out == map);
+  EXPECT_TRUE(cmp);
+
+  // Test Purge and Destroy
+  rc = myClient.PurgeCache();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myClient.DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+TEST_F(MindDataTestCacheOp, TestConcurrencyRequest) {
+  // Clear the rc of the master thread if any
+  (void)TaskManager::GetMasterThreadRc();
+  TaskGroup vg;
+  Status rc;
+  CacheClient myClient(1, 1, true);  // use arbitrary session of 1, size 1, spilling is true
+  // cksum value of 1 for CreateCache here...normally you do not directly create a cache and the cksum arg is generated.
+  rc = myClient.CreateCache(1, true);
+  EXPECT_TRUE(rc.IsOk());
+  std::cout << myClient << std::endl;
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  t->SetItemAt<uint64_t>({0, 0}, 1);
+  t->SetItemAt<uint64_t>({0, 1}, 2);
+  t->SetItemAt<uint64_t>({0, 2}, 3);
+  t->SetItemAt<uint64_t>({1, 0}, 4);
+  t->SetItemAt<uint64_t>({1, 1}, 5);
+  t->SetItemAt<uint64_t>({1, 2}, 6);
+  TensorTable tbl;
+  TensorRow row;
+  row.push_back(t);
+  // Cache tensor row t 5000 times using 10 threads.
+  for (auto k = 0; k < 10; ++k) {
+    Status vg_rc = vg.CreateAsyncTask("Test agent", [&myClient, &row]() -> Status {
+      TaskManager::FindMe()->Post();
+      for (auto i = 0; i < 500; i++) {
+        RETURN_IF_NOT_OK(myClient.WriteRow(row));
+      }
+      return Status::OK();
+    });
+    EXPECT_TRUE(vg_rc.IsOk());
+  }
+  ASSERT_TRUE(vg.join_all().IsOk());
+  ASSERT_TRUE(vg.GetTaskErrorIfAny().IsOk());
+  rc = myClient.BuildPhaseDone();
+  ASSERT_TRUE(rc.IsOk());
+  // Get statistics from the server.
+  CacheClient::ServiceStat stat{};
+  rc = myClient.GetStat(&stat);
+  ASSERT_TRUE(rc.IsOk());
+  std::cout << stat.min_row_id << ":" << stat.max_row_id << ":" << stat.num_mem_cached << ":" << stat.num_disk_cached
+            << "\n";
+  // Expect there are 5000 rows there.
+  EXPECT_EQ(5000, stat.max_row_id - stat.min_row_id + 1);
+  // Get them all back using row id and compare with tensor t.
+  for (auto i = stat.min_row_id; i <= stat.max_row_id; ++i) {
+    tbl.clear();
+    row.clear();
+    rc = myClient.GetRows({i}, &tbl);
+    EXPECT_TRUE(rc.IsOk());
+    row = tbl.front();
+    auto r = row.front();
+    bool cmp = (*t == *r);
+    EXPECT_TRUE(cmp);
+  }
+  rc = myClient.DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+// Simple test with a repeated cache op over random data producer
+//
+//     RepeatOp
+//        |
+//     CacheOp
+//        |
+//   RandomDataOp
+//
+TEST_F(MindDataTestCacheOp, TestRandomDataCache1) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestRandomDataCache1";
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(4)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(50)  // 50 samples for now
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  // size of 0, spilling is true
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 0, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+  rc = CacheOp::Builder()
+         .SetNumWorkers(5)
+         .SetClient(myClient)
+         .SetRowsPerBuffer(4)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  // quick check to see what tree looks like
+  std::ostringstream ss;
+  ss << *myTree;  // some funny const error if I try to write directly to ms log stream
+  MS_LOG(INFO) << "Here's the tree:\n" << ss.str();
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 200);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+//// Simple test with a repeated cache op over random data producer.
+//// This one will exceed memory and require a spill.
+////
+////     RepeatOp
+////        |
+////     CacheOp
+////        |
+////   RandomDataOp
+////
+TEST_F(MindDataTestCacheOp, TestRandomDataCacheSpill) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestRandomDataCacheSpill";
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(2)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(10)
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 4, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+  rc = CacheOp::Builder()
+         .SetNumWorkers(4)
+         .SetClient(myClient)
+         .SetRowsPerBuffer(3)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 40);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+TEST_F(MindDataTestCacheOp, TestImageFolderCacheMerge) {
+  Status rc;
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 0, true);
+
+  std::shared_ptr<CacheMergeOp> myMergeOp;
+  rc = CacheMergeOp::Builder().SetNumWorkers(3).SetOpConnectorSize(3).SetNumCleaner(2).SetClient(myClient).Build(
+    &myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<CacheLookupOp> myLookupOp;
+  rc = CacheLookupOp::Builder()
+         .SetNumWorkers(3)
+         .SetOpConnectorSize(3)
+         .SetClient(myClient)
+         .SetSampler(seq_sampler)
+         .Build(&myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<ImageFolderOp> so;
+  ImageFolderOp::Builder builder;
+  builder.SetSampler(myLookupOp)
+    .SetOpConnectorSize(3)
+    .SetNumWorkers(3)
+    .SetRowsPerBuffer(2)
+    .SetExtensions({".jpg", ".JPEG"})
+    .SetRecursive(true)
+    .SetImageFolderDir(datasets_root_path_ + "/testPK/data");
+  rc = builder.Build(&so);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  auto myTree = std::make_shared<ExecutionTree>();
+  rc = myTree->AssociateNode(so);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myRepeatOp->AddChild(myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myMergeOp->AddChild(myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myMergeOp->AddChild(so);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    if (rc.IsError()) {
+      std::cout << rc << std::endl;
+      break;
+    }
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 176);
+  std::cout << "Row count : " << rowCount << std::endl;
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+//// Simple test with a repeated cache op over random data producer.
+//// The difference in this one is that you do not add the sampler to the cache op directly.
+//// Instead, the sampler is added as part of the leaf op construction.  Then, the prepare
+//// phase will pull this up from the leaf and into the cache.
+//// It removes the sampler from the leaf op, which doesn't make sense there anyway for
+//// the RandomDataOp which doesn't support sampling without a cache.
+////
+////     RepeatOp
+////        |
+////     CacheOp
+////        |
+////   RandomDataOp
+////
+TEST_F(MindDataTestCacheOp, TestCacheInheritSampler) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestCacheInheritSampler";
+
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(2)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(10)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 4, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+  rc = CacheOp::Builder().SetNumWorkers(4).SetClient(myClient).SetRowsPerBuffer(3).Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 40);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
diff --git a/tests/ut/cpp/dataset/celeba_op_test.cc b/tests/ut/cpp/dataset/celeba_op_test.cc
index a109739fda..ccaed122f4 100644
--- a/tests/ut/cpp/dataset/celeba_op_test.cc
+++ b/tests/ut/cpp/dataset/celeba_op_test.cc
@@ -19,11 +19,11 @@
 #include <string>
 
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/celeba_op.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/center_crop_op_test.cc b/tests/ut/cpp/dataset/center_crop_op_test.cc
index 54c45c957e..cd0f362f64 100644
--- a/tests/ut/cpp/dataset/center_crop_op_test.cc
+++ b/tests/ut/cpp/dataset/center_crop_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/center_crop_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/channel_swap_test.cc b/tests/ut/cpp/dataset/channel_swap_test.cc
index f1dc1396ca..2000de15b2 100644
--- a/tests/ut/cpp/dataset/channel_swap_test.cc
+++ b/tests/ut/cpp/dataset/channel_swap_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/hwc_to_chw_op.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/core/data_type.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cifar_op_test.cc b/tests/ut/cpp/dataset/cifar_op_test.cc
index b37b9acaee..ed22f4f347 100644
--- a/tests/ut/cpp/dataset/cifar_op_test.cc
+++ b/tests/ut/cpp/dataset/cifar_op_test.cc
@@ -20,14 +20,14 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/circular_pool_test.cc b/tests/ut/cpp/dataset/circular_pool_test.cc
index c42b08ddcd..d06f846684 100644
--- a/tests/ut/cpp/dataset/circular_pool_test.cc
+++ b/tests/ut/cpp/dataset/circular_pool_test.cc
@@ -15,9 +15,9 @@
  */
 #include <string>
 #include <random>
-#include "dataset/util/task_manager.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/services.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/client_config_test.cc b/tests/ut/cpp/dataset/client_config_test.cc
index a907d50134..5cc9600b4e 100644
--- a/tests/ut/cpp/dataset/client_config_test.cc
+++ b/tests/ut/cpp/dataset/client_config_test.cc
@@ -20,11 +20,11 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/status.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/clue_op_test.cc b/tests/ut/cpp/dataset/clue_op_test.cc
index ff2f01a9ff..0935434a06 100644
--- a/tests/ut/cpp/dataset/clue_op_test.cc
+++ b/tests/ut/cpp/dataset/clue_op_test.cc
@@ -17,13 +17,13 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 
diff --git a/tests/ut/cpp/dataset/coco_op_test.cc b/tests/ut/cpp/dataset/coco_op_test.cc
index bcb82f8ec1..6e6d3c26e5 100644
--- a/tests/ut/cpp/dataset/coco_op_test.cc
+++ b/tests/ut/cpp/dataset/coco_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc
index 70e6b5a339..62c9f85348 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.cc
+++ b/tests/ut/cpp/dataset/common/bboxop_common.cc
@@ -26,9 +26,9 @@
 #include "./tinyxml2.h"
 #include "opencv2/opencv.hpp"
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/util/path.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/core/constants.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -66,17 +66,16 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz
     MS_LOG(ERROR) << "Images folder was not found : " + images_path;
     EXPECT_TRUE(dir_path.Exists());
   }
-  std::size_t files_fetched = 0;
   // get image file paths
-  while (image_dir_itr->hasNext() && files_fetched < num_of_samples) {
+  while (image_dir_itr->hasNext()) {
     Path image_path = image_dir_itr->next();
     if (image_path.Extension() == std::string(kImageExt)) {
       paths_to_fetch.push_back(image_path.toString());
-      files_fetched++;
     }
   }
   // sort fetched files
   std::sort(paths_to_fetch.begin(), paths_to_fetch.end());
+  std::size_t files_fetched = 0;
   for (const auto &image_file : paths_to_fetch) {
     std::string image_ext = std::string(kImageExt);
     std::string annot_file = image_file;
@@ -100,6 +99,10 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz
     // add image and annotation to the tensor table
     TensorRow row_data({std::move(input_tensor_), std::move(annotation_tensor)});
     images_and_annotations_.push_back(row_data);
+    files_fetched++;
+    if (files_fetched == num_of_samples) {
+      break;
+    }
   }
 }
 
@@ -118,14 +121,11 @@ void BBoxOpCommon::SaveImagesWithAnnotations(BBoxOpCommon::FileType type, const
     bool passing_data_fetch = true;
     // For each bounding box draw on the image.
     for (uint32_t i = 0; i < num_of_boxes; i++) {
-      uint32_t x = 0;
-      uint32_t y = 0;
-      uint32_t w = 0;
-      uint32_t h = 0;
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&x, {i, 0}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&y, {i, 1}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&w, {i, 2}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&h, {i, 3}).IsOk();
+      float x = 0.0, y = 0.0, w = 0.0, h = 0.0;
+      passing_data_fetch &= row[1]->GetItemAt<float>(&x, {i, 0}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&y, {i, 1}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&w, {i, 2}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&h, {i, 3}).IsOk();
       if (!passing_data_fetch) {
         MS_LOG(ERROR) << "Fetching bbox coordinates failed in SaveImagesWithAnnotations.";
         EXPECT_TRUE(passing_data_fetch);
@@ -193,24 +193,24 @@ bool BBoxOpCommon::LoadAnnotationFile(const std::string &path, std::shared_ptr<T
     MS_LOG(ERROR) << "No object find in " + path;
     return false;
   }
-  std::vector<uint32_t> return_value_list;
+  std::vector<float> return_value_list;
   dsize_t bbox_count = 0;      // keep track of number of bboxes in file
   dsize_t bbox_val_count = 4;  // creating bboxes of size 4 to test function
   // FILE OK TO READ
   while (object != nullptr) {
     bbox_count += 1;
     std::string label_name;
-    uint32_t xmin = 0, ymin = 0, xmax = 0, ymax = 0;
+    float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0;
     XMLElement *bbox_node = object->FirstChildElement("bndbox");
     if (bbox_node != nullptr) {
       XMLElement *xmin_node = bbox_node->FirstChildElement("xmin");
-      if (xmin_node != nullptr) xmin = xmin_node->UnsignedText();
+      if (xmin_node != nullptr) xmin = xmin_node->FloatText();
       XMLElement *ymin_node = bbox_node->FirstChildElement("ymin");
-      if (ymin_node != nullptr) ymin = ymin_node->UnsignedText();
+      if (ymin_node != nullptr) ymin = ymin_node->FloatText();
       XMLElement *xmax_node = bbox_node->FirstChildElement("xmax");
-      if (xmax_node != nullptr) xmax = xmax_node->UnsignedText();
+      if (xmax_node != nullptr) xmax = xmax_node->FloatText();
       XMLElement *ymax_node = bbox_node->FirstChildElement("ymax");
-      if (ymax_node != nullptr) ymax = ymax_node->UnsignedText();
+      if (ymax_node != nullptr) ymax = ymax_node->FloatText();
     } else {
       MS_LOG(ERROR) << "bndbox dismatch in " + path;
       return false;
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.h b/tests/ut/cpp/dataset/common/bboxop_common.h
index ba3ceb62d9..243908e7a3 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.h
+++ b/tests/ut/cpp/dataset/common/bboxop_common.h
@@ -17,7 +17,7 @@
 #define TESTS_DATASET_UT_CORE_COMMON_DE_UT_BBOXOP_COMMON_H_
 
 #include "cvop_common.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 
 namespace UT {
 namespace CVOP {
diff --git a/tests/ut/cpp/dataset/common/cvop_common.cc b/tests/ut/cpp/dataset/common/cvop_common.cc
index 6f66229e80..48d69564fd 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.cc
+++ b/tests/ut/cpp/dataset/common/cvop_common.cc
@@ -18,9 +18,9 @@
 #include <string>
 #include <vector>
 #include "cvop_common.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 #include <fstream>
 #include <opencv2/opencv.hpp>
diff --git a/tests/ut/cpp/dataset/common/cvop_common.h b/tests/ut/cpp/dataset/common/cvop_common.h
index 02c079fd68..59134091fd 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.h
+++ b/tests/ut/cpp/dataset/common/cvop_common.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include "common.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace UT {
 namespace CVOP {
diff --git a/tests/ut/cpp/dataset/concat_op_test.cc b/tests/ut/cpp/dataset/concat_op_test.cc
index 70d0268ec7..9e991ce0d3 100644
--- a/tests/ut/cpp/dataset/concat_op_test.cc
+++ b/tests/ut/cpp/dataset/concat_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/concatenate_op_test.cc b/tests/ut/cpp/dataset/concatenate_op_test.cc
index 1ceedbac38..dc2fc69266 100644
--- a/tests/ut/cpp/dataset/concatenate_op_test.cc
+++ b/tests/ut/cpp/dataset/concatenate_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/connector_test.cc b/tests/ut/cpp/dataset/connector_test.cc
index 7ee36cc2c0..0fc5b100d7 100644
--- a/tests/ut/cpp/dataset/connector_test.cc
+++ b/tests/ut/cpp/dataset/connector_test.cc
@@ -23,8 +23,8 @@
 
 
 #include "common/common.h"
-#include "dataset/engine/connector.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cut_out_op_test.cc b/tests/ut/cpp/dataset/cut_out_op_test.cc
index 462fb3a875..5d24d9c3f9 100644
--- a/tests/ut/cpp/dataset/cut_out_op_test.cc
+++ b/tests/ut/cpp/dataset/cut_out_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cyclic_array_test.cc b/tests/ut/cpp/dataset/cyclic_array_test.cc
index 55f75c403f..380436de1b 100644
--- a/tests/ut/cpp/dataset/cyclic_array_test.cc
+++ b/tests/ut/cpp/dataset/cyclic_array_test.cc
@@ -19,7 +19,7 @@
 #include "common/cvop_common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
 #include <chrono>
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/datatype_test.cc b/tests/ut/cpp/dataset/datatype_test.cc
index a55853c4c5..b81618dc24 100644
--- a/tests/ut/cpp/dataset/datatype_test.cc
+++ b/tests/ut/cpp/dataset/datatype_test.cc
@@ -15,16 +15,14 @@
  */
 #include <string>
 #include "./securec.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/data_type.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include <opencv2/opencv.hpp>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 using namespace mindspore::dataset;
 
-namespace py = pybind11;
-
 class MindDataTestDatatype : public UT::Common {
  public:
     MindDataTestDatatype() = default;
diff --git a/tests/ut/cpp/dataset/decode_op_test.cc b/tests/ut/cpp/dataset/decode_op_test.cc
index 7f3e129ac0..1cd03099ce 100644
--- a/tests/ut/cpp/dataset/decode_op_test.cc
+++ b/tests/ut/cpp/dataset/decode_op_test.cc
@@ -16,7 +16,7 @@
 #include <fstream>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/duplicate_op_test.cc b/tests/ut/cpp/dataset/duplicate_op_test.cc
index b7ce32f655..93779b084d 100644
--- a/tests/ut/cpp/dataset/duplicate_op_test.cc
+++ b/tests/ut/cpp/dataset/duplicate_op_test.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/execution_tree_test.cc b/tests/ut/cpp/dataset/execution_tree_test.cc
index 529644331a..b871dd00d8 100644
--- a/tests/ut/cpp/dataset/execution_tree_test.cc
+++ b/tests/ut/cpp/dataset/execution_tree_test.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 #include <string>
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/fill_op_test.cc b/tests/ut/cpp/dataset/fill_op_test.cc
index d43b7d7548..20e323cc8d 100644
--- a/tests/ut/cpp/dataset/fill_op_test.cc
+++ b/tests/ut/cpp/dataset/fill_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/filter_op_test.cc b/tests/ut/cpp/dataset/filter_op_test.cc
index 45ee714337..3e5be8dc04 100644
--- a/tests/ut/cpp/dataset/filter_op_test.cc
+++ b/tests/ut/cpp/dataset/filter_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/global_context_test.cc b/tests/ut/cpp/dataset/global_context_test.cc
index bb75d941aa..cd4c970ae6 100644
--- a/tests/ut/cpp/dataset/global_context_test.cc
+++ b/tests/ut/cpp/dataset/global_context_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "common/common.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/gnn_graph_test.cc b/tests/ut/cpp/dataset/gnn_graph_test.cc
index dc74e66b0c..c4dd7b055c 100644
--- a/tests/ut/cpp/dataset/gnn_graph_test.cc
+++ b/tests/ut/cpp/dataset/gnn_graph_test.cc
@@ -20,9 +20,9 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/graph_loader.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
 
 using namespace mindspore::dataset;
 using namespace mindspore::dataset::gnn;
@@ -49,9 +49,10 @@ TEST_F(MindDataTestGNNGraph, TestGraphLoader) {
   EdgeTypeMap e_type_map;
   NodeFeatureMap n_feature_map;
   EdgeFeatureMap e_feature_map;
-  DefaultFeatureMap default_feature_map;
+  DefaultNodeFeatureMap default_node_feature_map;
+  DefaultEdgeFeatureMap default_edge_feature_map;
   EXPECT_TRUE(gl.GetNodesAndEdges(&n_id_map, &e_id_map, &n_type_map, &e_type_map, &n_feature_map, &e_feature_map,
-                                  &default_feature_map)
+                                  &default_node_feature_map, &default_edge_feature_map)
                 .IsOk());
   EXPECT_EQ(n_id_map.size(), 20);
   EXPECT_EQ(e_id_map.size(), 40);
@@ -119,6 +120,17 @@ TEST_F(MindDataTestGNNGraph, TestGetSampledNeighbors) {
   std::transform(edges->begin<EdgeIdType>(), edges->end<EdgeIdType>(), edge_list.begin(),
                  [](const EdgeIdType edge) { return edge; });
 
+  TensorRow edge_features;
+  s = graph.GetEdgeFeature(edges, meta_info.edge_feature_type, &edge_features);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(edge_features[0]->ToString() ==
+              "Tensor (shape: <40>, Type: int32)\n"
+              "[0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0]");
+  EXPECT_TRUE(edge_features[1]->ToString() ==
+              "Tensor (shape: <40>, Type: float32)\n"
+              "[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2,2.1,2.2,2.3,2.4,2.5,2.6,2."
+              "7,2.8,2.9,3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4]");
+
   std::shared_ptr<Tensor> nodes;
   s = graph.GetNodesFromEdges(edge_list, &nodes);
   EXPECT_TRUE(s.IsOk());
@@ -247,4 +259,30 @@ TEST_F(MindDataTestGNNGraph, TestRandomWalk) {
   s = graph.RandomWalk(node_list, meta_path, 2.0, 0.5, -1, &walk_path);
   EXPECT_TRUE(s.IsOk());
   EXPECT_TRUE(walk_path->shape().ToString() == "<33,60>");
-}
\ No newline at end of file
+}
+
+TEST_F(MindDataTestGNNGraph, TestRandomWalkDefaults) {
+  std::string path = "data/mindrecord/testGraphData/sns";
+  Graph graph(path, 1);
+  Status s = graph.Init();
+  EXPECT_TRUE(s.IsOk());
+
+  MetaInfo meta_info;
+  s = graph.GetMetaInfo(&meta_info);
+  EXPECT_TRUE(s.IsOk());
+
+  std::shared_ptr<Tensor> nodes;
+  s = graph.GetAllNodes(meta_info.node_type[0], &nodes);
+  EXPECT_TRUE(s.IsOk());
+  std::vector<NodeIdType> node_list;
+  for (auto itr = nodes->begin<NodeIdType>(); itr != nodes->end<NodeIdType>(); ++itr) {
+    node_list.push_back(*itr);
+  }
+
+  print_int_vec(node_list, "node list ");
+  std::vector<NodeType> meta_path(59, 1);
+  std::shared_ptr<Tensor> walk_path;
+  s = graph.RandomWalk(node_list, meta_path, 1.0, 1.0, -1, &walk_path);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(walk_path->shape().ToString() == "<33,60>");
+}
diff --git a/tests/ut/cpp/dataset/image_folder_op_test.cc b/tests/ut/cpp/dataset/image_folder_op_test.cc
index 576c5abbfc..3168efa196 100644
--- a/tests/ut/cpp/dataset/image_folder_op_test.cc
+++ b/tests/ut/cpp/dataset/image_folder_op_test.cc
@@ -19,18 +19,18 @@
 #include <string>
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/interrupt_test.cc b/tests/ut/cpp/dataset/interrupt_test.cc
index 7ab608b9ae..8a06413175 100644
--- a/tests/ut/cpp/dataset/interrupt_test.cc
+++ b/tests/ut/cpp/dataset/interrupt_test.cc
@@ -15,10 +15,10 @@
  */
 #include "common/common.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/services.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
 
 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::INFO;
diff --git a/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc b/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
index c5a733f285..85b3384d36 100644
--- a/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
+++ b/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
@@ -18,7 +18,7 @@
 #include <string_view>
 
 #include "common/common.h"
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
@@ -39,21 +39,22 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opFuntions) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("今天天气太好了我们一起去外面玩吧");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 7);
-  CheckEqual(output_tensor, {0}, "今天天气");
-  CheckEqual(output_tensor, {1}, "太好了");
-  CheckEqual(output_tensor, {2}, "我们");
-  CheckEqual(output_tensor, {3}, "一起");
-  CheckEqual(output_tensor, {4}, "去");
-  CheckEqual(output_tensor, {5}, "外面");
-  CheckEqual(output_tensor, {6}, "玩吧");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 7);
+  CheckEqual(output[0], {0}, "今天天气");
+  CheckEqual(output[0], {1}, "太好了");
+  CheckEqual(output[0], {2}, "我们");
+  CheckEqual(output[0], {3}, "一起");
+  CheckEqual(output[0], {4}, "去");
+  CheckEqual(output[0], {5}, "外面");
+  CheckEqual(output[0], {6}, "玩吧");
 }
 
 TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
@@ -61,16 +62,17 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   op->AddWord("男默女泪");
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("男默女泪");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 1);
-  CheckEqual(output_tensor, {0}, "男默女泪");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 1);
+  CheckEqual(output[0], {0}, "男默女泪");
 }
 
 TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
@@ -78,14 +80,15 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   op->AddWord("男默女泪");
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 1);
-  CheckEqual(output_tensor, {0}, "");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 1);
+  CheckEqual(output[0], {0}, "");
 }
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc
index 6317a6a345..a6eef4aaa2 100644
--- a/tests/ut/cpp/dataset/manifest_op_test.cc
+++ b/tests/ut/cpp/dataset/manifest_op_test.cc
@@ -20,12 +20,12 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/map_op_test.cc b/tests/ut/cpp/dataset/map_op_test.cc
index 8b6a152488..4e9cfe9ec9 100644
--- a/tests/ut/cpp/dataset/map_op_test.cc
+++ b/tests/ut/cpp/dataset/map_op_test.cc
@@ -17,13 +17,14 @@
 #include <memory>
 #include <vector>
 
+
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -35,93 +36,99 @@ namespace dataset {
 namespace test {
 class NoOp : public TensorOp {
  public:
-    NoOp() {};
+  NoOp(){};
+
+  ~NoOp(){};
 
-    ~NoOp() {};
+  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
+    *output = std::move(input);
+    return Status::OK();
+  };
 
-    Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
-      *output = std::move(input);
-      return Status::OK();
-    };
+  void Print(std::ostream &out) const override { out << "NoOp"; };
 
-    void Print(std::ostream &out) const override { out << "NoOp"; };
+  std::string Name() const override { return kNoOp; }
 };
 
 class ThreeToOneOp : public TensorOp {
  public:
-    ThreeToOneOp() {};
+  ThreeToOneOp(){};
+
+  ~ThreeToOneOp(){};
 
-    ~ThreeToOneOp() {};
+  uint32_t NumInput() override { return 3; }
+  // Compute function that holds the actual implementation of the operation.
+  Status Compute(const TensorRow &input, TensorRow *output) override {
+    output->push_back(input[0]);
+    return Status::OK();
+  };
 
-    uint32_t NumInput() override { return 3; }
-    // Compute function that holds the actual implementation of the operation.
-    Status Compute(const TensorRow &input, TensorRow *output) override {
-      output->push_back(input[0]);
-      return Status::OK();
-    };
+  void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
 
-    void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
+  std::string Name() const override { return "ThreeToOneOp"; }
 };
 
 class OneToThreeOp : public TensorOp {
  public:
-    OneToThreeOp() {};
+  OneToThreeOp(){};
 
-    ~OneToThreeOp() {};
+  ~OneToThreeOp(){};
 
   uint32_t NumOutput() override { return 3; }
 
-    // Compute function that holds the actual implementation of the operation.
-    // Simply pushing the same shared pointer of the first element of input vector three times.
-    Status Compute(const TensorRow &input, TensorRow *output) override {
-      output->push_back(input[0]);
-      output->push_back(input[0]);
-      output->push_back(input[0]);
-      return Status::OK();
-    };
+  // Compute function that holds the actual implementation of the operation.
+  // Simply pushing the same shared pointer of the first element of input vector three times.
+  Status Compute(const TensorRow &input, TensorRow *output) override {
+    output->push_back(input[0]);
+    output->push_back(input[0]);
+    output->push_back(input[0]);
+    return Status::OK();
+  };
 
-    void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
+  void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
+
+  std::string Name() const override { return "OneToThreeOp"; };
 };
 }  // namespace test
 }  // namespace dataset
 }  // namespace mindspore
 
-
 class MindDataTestMapOp : public UT::DatasetOpTesting {
  public:
-    void SetUp() override {
-      DatasetOpTesting::SetUp();
-      dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data";
-      schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json";
+  void SetUp() override {
+    DatasetOpTesting::SetUp();
+    dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data";
+    schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json";
 
-      GlobalInit();
+    GlobalInit();
 
-      // Start with an empty execution tree
-      my_tree_ = std::make_shared<ExecutionTree>();
-    }
+    // Start with an empty execution tree
+    my_tree_ = std::make_shared<ExecutionTree>();
+  }
 
-    std::shared_ptr<TFReaderOp> CreateTFReaderOp() {
-      std::shared_ptr<TFReaderOp> my_tfreader_op;
-      TFReaderOp::Builder builder;
-      builder.SetDatasetFilesList({dataset_path_})
-          .SetColumnsToLoad({"image", "label", "A", "B"})
-          .SetRowsPerBuffer(2)
-          .SetWorkerConnectorSize(2)
-          .SetNumWorkers(2);
-
-      std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
-      schema->LoadSchemaFile(schema_path_, {});
-      builder.SetDataSchema(std::move(schema));
-
-      Status rc = builder.Build(&my_tfreader_op);
-      EXPECT_TRUE(rc.IsOk());
-      return my_tfreader_op;
-    }
+  std::shared_ptr<TFReaderOp> CreateTFReaderOp() {
+    std::shared_ptr<TFReaderOp> my_tfreader_op;
+    TFReaderOp::Builder builder;
+    builder.SetDatasetFilesList({dataset_path_})
+      .SetColumnsToLoad({"image", "label", "A", "B"})
+      .SetRowsPerBuffer(2)
+      .SetWorkerConnectorSize(2)
+      .SetNumWorkers(2);
+
+    std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+    schema->LoadSchemaFile(schema_path_, {});
+    builder.SetDataSchema(std::move(schema));
+
+    Status rc = builder.Build(&my_tfreader_op);
+    EXPECT_TRUE(rc.IsOk());
+    return my_tfreader_op;
+  }
+
+  std::shared_ptr<ExecutionTree> my_tree_;
 
-    std::shared_ptr<ExecutionTree> my_tree_;
  private:
-    std::string dataset_path_;
-    std::string schema_path_;
+  std::string dataset_path_;
+  std::string schema_path_;
 };
 
 std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
@@ -148,10 +155,7 @@ TEST_F(MindDataTestMapOp, TestAsMap) {
   my_func_list.push_back(my_no_op);
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"image"})
-      .SetOutColNames({"X"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+  builder.SetInColNames({"image"}).SetOutColNames({"X"}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   rc = my_tree_->AssociateNode(my_map_op);
   EXPECT_TRUE(rc.IsOk());
@@ -200,9 +204,9 @@ TEST_F(MindDataTestMapOp, Test3to1) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image", "A", "B"})
-      .SetOutColNames({"X"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+    .SetOutColNames({"X"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -252,10 +256,9 @@ TEST_F(MindDataTestMapOp, Test1to3) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image"})
-      .SetOutColNames({"X", "Y", "Z"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
-
+    .SetOutColNames({"X", "Y", "Z"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
 
   // ProjectOp
   std::vector<std::string> columns_to_project = {"X", "Y", "Z", "label", "A", "B"};
@@ -296,19 +299,18 @@ TEST_F(MindDataTestMapOp, Test1to3) {
 
   // Getting the next row as vector (by position).
   TensorRow tensor_list;
-  rc =di.FetchNextTensorRow(&tensor_list);
+  rc = di.FetchNextTensorRow(&tensor_list);
   EXPECT_TRUE(rc.IsOk());
 
   // Based on the schema file, create the golden result to compare with.
   std::vector<DataType::Type> golden_types({DataType::Type::DE_UINT8, DataType::Type::DE_UINT8,
                                             DataType::Type::DE_UINT8, DataType::Type::DE_INT64,
-                                            DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64}
-  );
+                                            DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64});
 
   std::vector<uint64_t> golden_ranks({3, 3, 3, 1, 4, 1});
 
   std::vector<TensorShape> golden_shapes({TensorShape({3, 4, 2}), TensorShape({3, 4, 2}), TensorShape({3, 4, 2}),
-        TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})} );
+                                          TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})});
 
   while (!tensor_list.empty()) {
     for (uint32_t i = 0; i < tensor_list.size(); i++) {
@@ -343,9 +345,9 @@ TEST_F(MindDataTestMapOp, TestMultiTensorOp) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image", "A", "B"})
-      .SetOutColNames({"X", "Y", "Z"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+    .SetOutColNames({"X", "Y", "Z"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -405,10 +407,7 @@ TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) {
 
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"label"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(5);
+  builder.SetInColNames({"label"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(5);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -440,7 +439,6 @@ TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) {
     MS_LOG(INFO) << "row_count: " << row_count << ".";
     rc = di.FetchNextTensorRow(&tensor_list);
     EXPECT_TRUE(rc.IsOk());
-
   }
   ASSERT_EQ(row_count, 10 * num_repeats);
 }
@@ -467,10 +465,7 @@ TEST_F(MindDataTestMapOp, TestTFReaderMapRepeat) {
 
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"label"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(50);
+  builder.SetInColNames({"label"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(50);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -536,25 +531,18 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) {
 
   std::shared_ptr<MapOp> my_map_decode_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(4);
+  builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(4);
   rc = builder.Build(&my_map_decode_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_decode_op);
   EXPECT_TRUE(rc.IsOk());
 
-
   auto resize_op = std::make_shared<ResizeOp>(300, 300);
   std::vector<std::shared_ptr<TensorOp>> my_func_list2;
   my_func_list2.push_back(resize_op);
   std::shared_ptr<MapOp> my_map_resize_op;
   MapOp::Builder builder2;
-  builder2.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list2))
-    .SetNumWorkers(5);
+  builder2.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list2)).SetNumWorkers(5);
   rc = builder2.Build(&my_map_resize_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_resize_op);
@@ -610,10 +598,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
 
   std::shared_ptr<MapOp> map_decode_map;
   MapOp::Builder map_decode_builder;
-  map_decode_builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list)
-    .SetNumWorkers(4);
+  map_decode_builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
   rc = map_decode_builder.Build(&map_decode_map);
   EXPECT_TRUE(rc.IsOk());
 
@@ -622,10 +607,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
   func_list2.push_back(resize_op);
   std::shared_ptr<MapOp> map_resize_op;
   MapOp::Builder map_resize_builder;
-  map_resize_builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list2)
-    .SetNumWorkers(5);
+  map_resize_builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(func_list2).SetNumWorkers(5);
   rc = map_resize_builder.Build(&map_resize_op);
   EXPECT_TRUE(rc.IsOk());
 
@@ -704,7 +686,6 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
   EXPECT_EQ(result, result2);
 }
 
-
 TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
   Status rc;
   MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize_NoInputColumns.";
@@ -722,10 +703,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
 
   std::shared_ptr<MapOp> map_decode_map;
   MapOp::Builder map_decode_builder;
-  map_decode_builder.SetInColNames({})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list)
-    .SetNumWorkers(4);
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
   rc = map_decode_builder.Build(&map_decode_map);
   EXPECT_TRUE(rc.IsOk());
 
@@ -761,3 +739,5 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
   }
   EXPECT_TRUE(i == 88);
 }
+
+
diff --git a/tests/ut/cpp/dataset/mask_test.cc b/tests/ut/cpp/dataset/mask_test.cc
index 9ff5f51fce..609d5bf447 100644
--- a/tests/ut/cpp/dataset/mask_test.cc
+++ b/tests/ut/cpp/dataset/mask_test.cc
@@ -15,15 +15,15 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/kernels/data/mask_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/memory_pool_test.cc b/tests/ut/cpp/dataset/memory_pool_test.cc
index 136f3fe1b8..b5907655dc 100644
--- a/tests/ut/cpp/dataset/memory_pool_test.cc
+++ b/tests/ut/cpp/dataset/memory_pool_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/system_pool.h"
-#include "dataset/util/allocator.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/allocator.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 
diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc
index b2cbdf027e..c9067535d6 100644
--- a/tests/ut/cpp/dataset/mind_record_op_test.cc
+++ b/tests/ut/cpp/dataset/mind_record_op_test.cc
@@ -16,14 +16,14 @@
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
diff --git a/tests/ut/cpp/dataset/mnist_op_test.cc b/tests/ut/cpp/dataset/mnist_op_test.cc
index da78cb6f7f..dfceeaa06a 100644
--- a/tests/ut/cpp/dataset/mnist_op_test.cc
+++ b/tests/ut/cpp/dataset/mnist_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/utils.h"
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/normalize_op_test.cc b/tests/ut/cpp/dataset/normalize_op_test.cc
index 05ac3f6289..31791e0e66 100644
--- a/tests/ut/cpp/dataset/normalize_op_test.cc
+++ b/tests/ut/cpp/dataset/normalize_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/normalize_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 #include <opencv2/opencv.hpp>
 
diff --git a/tests/ut/cpp/dataset/one_hot_op_test.cc b/tests/ut/cpp/dataset/one_hot_op_test.cc
index c414e371e5..2617ae4536 100644
--- a/tests/ut/cpp/dataset/one_hot_op_test.cc
+++ b/tests/ut/cpp/dataset/one_hot_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/pad_end_op_test.cc b/tests/ut/cpp/dataset/pad_end_op_test.cc
index 2787501aa9..1c838da8e8 100644
--- a/tests/ut/cpp/dataset/pad_end_op_test.cc
+++ b/tests/ut/cpp/dataset/pad_end_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/pad_op_test.cc b/tests/ut/cpp/dataset/pad_op_test.cc
index b659d009f3..e2bd822d02 100644
--- a/tests/ut/cpp/dataset/pad_op_test.cc
+++ b/tests/ut/cpp/dataset/pad_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/path_test.cc b/tests/ut/cpp/dataset/path_test.cc
index 4cf3b17968..b36b38bbc7 100644
--- a/tests/ut/cpp/dataset/path_test.cc
+++ b/tests/ut/cpp/dataset/path_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/perf_data_test.cc b/tests/ut/cpp/dataset/perf_data_test.cc
index 048ee1f21a..486209be21 100644
--- a/tests/ut/cpp/dataset/perf_data_test.cc
+++ b/tests/ut/cpp/dataset/perf_data_test.cc
@@ -17,8 +17,8 @@
 #include "common/cvop_common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/engine/perf/cyclic_array.h"
-#include "dataset/engine/perf/perf_data.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/perf/perf_data.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/project_op_test.cc b/tests/ut/cpp/dataset/project_op_test.cc
index 484396321c..45ef11b88f 100644
--- a/tests/ut/cpp/dataset/project_op_test.cc
+++ b/tests/ut/cpp/dataset/project_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/queue_test.cc b/tests/ut/cpp/dataset/queue_test.cc
index 578405e537..ec40cc2ae4 100644
--- a/tests/ut/cpp/dataset/queue_test.cc
+++ b/tests/ut/cpp/dataset/queue_test.cc
@@ -16,9 +16,11 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
 #include <atomic>
+#include <chrono>
+#include <random>
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -39,7 +41,7 @@ class RefCount {
  public:
   RefCount() : v_(nullptr) {}
   explicit RefCount(int x) : v_(std::make_shared<int>(x)) {}
-  explicit RefCount(const RefCount &o) : v_(o.v_) {}
+  RefCount(const RefCount &o) : v_(o.v_) {}
   ~RefCount() {
     MS_LOG(DEBUG) << "Destructor of RefCount called" << std::endl;
     gRefCountDestructorCalled++;
@@ -167,3 +169,70 @@ TEST_F(MindDataTestQueue, Test6) {
   MS_LOG(INFO) << "Popped value " << *pepped_value << " from queue index " << chosen_queue_index;
   ASSERT_EQ(*pepped_value, 99);
 }
+using namespace std::chrono;
+template <typename QueueType, typename PayloadType>
+void Perf(int n, int p, std::string name) {
+  auto payload = std::vector<PayloadType>(n, PayloadType(p));
+  auto queue = QueueType(n);
+  auto t0 = high_resolution_clock::now();
+  auto check = 0;
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.Add(PayloadType(p));
+  }
+  check = queue.size();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.PopFront(&payload[i]);
+  }
+  auto t1 = high_resolution_clock::now();
+  std::cout << name << " queue filled size: " << queue.size() << " " << check << std::endl;
+  auto t2 = high_resolution_clock::now();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.Add(PayloadType(p));
+  }
+  check = queue.size();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.PopFront(&payload[i]);
+  }
+  auto t3 = high_resolution_clock::now();
+  auto d = duration_cast<milliseconds>(t3 - t2 + t1 - t0).count();
+  std::cout << name << " queue emptied size: " << queue.size() << " " << check << std::endl;
+  std::cout << name << " "
+            << " ran in " << d << "ms" << std::endl;
+}
+
+template <typename QueueType, typename PayloadType>
+void Fuzz(int n, int p, std::string name) {
+  std::mt19937 gen(1);
+  auto payload = std::vector<PayloadType>(n, PayloadType(p));
+  auto queue = QueueType(n);
+  auto dist = std::uniform_int_distribution<int>(0, 2);
+  std::cout << "###" << std::endl;
+  for (auto i = 0; i < n; i++) {
+    auto v = dist(gen);
+    if (v == 0 && queue.size() < n - 1) {
+      queue.Add(std::move(payload[i]));
+    }
+    if (v == 1 && queue.size() > 0) {
+      queue.PopFront(&payload[i]);
+    } else {
+      queue.Reset();
+    }
+  }
+  std::cout << name << " fuzz ran " << queue.size() << std::endl;
+}
+TEST_F(MindDataTestQueue, TestPerf) {
+  try {
+    int kSz = 1000000;
+    //  std::cout << "enter size" << std::endl;
+    //  std::cin >> kSz;
+    Perf<Queue<std::vector<int>>, std::vector<int>>(kSz, 1, "old queue, vector of size 1");
+  } catch (const std::exception &e) {
+    std::cout << e.what() << std::endl;
+  }
+
+  std::cout << "Test Reset" << std::endl;
+  std::cout << "Enter fuzz size" << std::endl;
+  int fs = 1000;
+//  std::cin >> fs;
+  Fuzz<Queue<std::vector<int>>, std::vector<int>>(fs, 1, "New queue");
+}
diff --git a/tests/ut/cpp/dataset/random_color_adjust_op_test.cc b/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
index 82df108ad1..96f4dd8145 100644
--- a/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
+++ b/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_color_adjust_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
index 3d5298b071..fd59a90117 100644
--- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
@@ -16,7 +16,7 @@
 #include "common/common.h"
 #include "common/cvop_common.h"
 #include <random>
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc
new file mode 100644
index 0000000000..4efdcb8b78
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomResizedCropWithBBox_C";
+
+class MindDataTestRandomCropAndResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomCropAndResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp1) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp1.";
+  // setting seed here
+  uint32_t current_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(327362);
+  TensorRow output_tensor_row_;
+  TensorTable results;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 2;
+  float aspect_ub = 2.5;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_tensor_row_);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  GlobalContext::config_manager()->set_seed(current_seed);
+}
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp2) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp2.";
+  TensorRow output_tensor_row_;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 1;
+  float aspect_ub = 1.5;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+  }
+}
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp3) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp3.";
+  TensorRow output_tensor_row_;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 0.2;
+  float aspect_ub = 3;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+  }
+  MS_LOG(INFO) << "testRandomCropAndResizeWithBBoxOp end.";
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
index 1c9f3a98dc..170525b4e7 100644
--- a/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
@@ -16,10 +16,10 @@
 #include <fstream>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/core/config_manager.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -54,7 +54,7 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
   auto decode_and_crop = static_cast<RandomCropAndResizeOp>(crop_and_decode_copy);
   EXPECT_TRUE(crop_and_decode.OneToOne());
   GlobalContext::config_manager()->set_seed(42);
-  for (int k = 0; k < 100; k++) {
+  for (int k = 0; k < 10; k++) {
     (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output);
     (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output);
     cv::Mat output1 = CVTensor::AsCVTensor(crop_and_decode_output)->mat().clone();
@@ -104,10 +104,10 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
   int mse_sum, m1, m2, count;
   double mse;
 
-  for (int k = 0; k < 100; ++k) {
+  for (int k = 0; k < 10; ++k) {
     mse_sum = 0;
     count = 0;
-    for (auto i = 0; i < 100; i++) {
+    for (auto i = 0; i < 10; i++) {
       scale = rd_scale(rd);
       aspect = rd_aspect(rd);
       crop_width = std::round(std::sqrt(h * w * scale / aspect));
diff --git a/tests/ut/cpp/dataset/random_crop_op_test.cc b/tests/ut/cpp/dataset/random_crop_op_test.cc
index 2f3b19e2f4..9c8f1f31ed 100644
--- a/tests/ut/cpp/dataset/random_crop_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc
new file mode 100644
index 0000000000..fcf8ba2605
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomCropWithBBox_C";
+
+class MindDataTestRandomCropWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomCropWithBBoxOp() : BBoxOpCommon() {}
+  TensorRow output_tensor_row_;
+};
+
+TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp1) {
+  MS_LOG(INFO) << "Doing testRandomCropWithBBoxOp1.";
+  TensorTable results;
+  unsigned int crop_height = 128;
+  unsigned int crop_width = 128;
+  // setting seed here
+  uint32_t current_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(327362);
+  std::unique_ptr<RandomCropWithBBoxOp> op(
+    new RandomCropWithBBoxOp(crop_height, crop_width, 0, 0, 0, 0, BorderType::kConstant, false));
+  for (auto tensor_row_ : images_and_annotations_) {
+    Status s = op->Compute(tensor_row_, &output_tensor_row_);
+    size_t actual = 0;
+    if (s == Status::OK()) {
+      TensorShape get_shape = output_tensor_row_[0]->shape();
+      actual = get_shape[0] * get_shape[1] * get_shape[2];
+      results.push_back(output_tensor_row_);
+    }
+    EXPECT_EQ(actual, crop_height * crop_width * 3);
+    EXPECT_EQ(s, Status::OK());
+    EXPECT_EQ(4, output_tensor_row_[1]->shape()[1]);  // check for existence of 4 columns
+    // Compare Code
+    if (kSaveExpected) {
+      SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+    }
+    SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+    if (!kSaveExpected) {
+      CompareActualAndExpected(std::string(kOpName));
+    }
+    GlobalContext::config_manager()->set_seed(current_seed);
+  }
+}
+
+TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp2) {
+  MS_LOG(INFO) << "Doing testRandomCropWithBBoxOp2.";
+  // Crop params
+  unsigned int crop_height = 1280;
+  unsigned int crop_width = 1280;
+  std::unique_ptr<RandomCropWithBBoxOp> op(
+    new RandomCropWithBBoxOp(crop_height, crop_width, 513, 513, 513, 513, BorderType::kConstant, false));
+
+  for (auto tensor_row_ : images_and_annotations_) {
+    Status s = op->Compute(tensor_row_, &output_tensor_row_);
+    size_t actual = 0;
+    if (s == Status::OK()) {
+      TensorShape get_shape = output_tensor_row_[0]->shape();
+      actual = get_shape[0] * get_shape[1] * get_shape[2];
+    }
+    EXPECT_EQ(actual, crop_height * crop_width * 3);
+    EXPECT_EQ(s, Status::OK());
+    EXPECT_EQ(4, output_tensor_row_[1]->shape()[1]);  // check for existence of 4 columns
+  }
+  MS_LOG(INFO) << "testRandomCropWithBBoxOp end.";
+}
diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc
index f8a7440c03..3cb7b57ad6 100644
--- a/tests/ut/cpp/dataset/random_data_op_test.cc
+++ b/tests/ut/cpp/dataset/random_data_op_test.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include <memory>
 #include <vector>
 #include <iostream>
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/data_schema.h"
 
 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::INFO;
diff --git a/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc b/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
index eb2f753554..bb4ba7498d 100644
--- a/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
+++ b/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc b/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc
new file mode 100644
index 0000000000..ed4e866478
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::INFO;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomHorizontalFlipWithBBox";
+
+class MindDataTestRandomHorizontalFlipWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomHorizontalFlipWithBBoxOp() : UT::CVOP::BBOXOP::BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestRandomHorizontalFlipWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomHorizontalFlipWithBBox.";
+  TensorTable results;
+  std::unique_ptr<RandomHorizontalFlipWithBBoxOp> op(new RandomHorizontalFlipWithBBoxOp(1));
+  for (const auto &row: images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(row, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual , std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+}
diff --git a/tests/ut/cpp/dataset/random_resize_op_test.cc b/tests/ut/cpp/dataset/random_resize_op_test.cc
index ee185f2fc6..d9e85de6e5 100644
--- a/tests/ut/cpp/dataset/random_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_resize_op_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
 #include "common/common.h"
 #include "common/cvop_common.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc
new file mode 100644
index 0000000000..e106f57375
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomResizeWithBBox_C";
+
+class MindDataTestRandomResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestRandomResizeWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomResizeWithBBox.";
+  //setting seed here
+  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(120);
+  TensorTable results;
+  std::unique_ptr<RandomResizeWithBBoxOp> op(new RandomResizeWithBBoxOp(500));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    // selected a tensorRow
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  GlobalContext::config_manager()->set_seed(curr_seed);
+  MS_LOG(INFO) << "testRandomResizeWithBBox end.";
+}
diff --git a/tests/ut/cpp/dataset/random_rotation_op_test.cc b/tests/ut/cpp/dataset/random_rotation_op_test.cc
index 8b82ef1dcd..a6eb5a1ff3 100644
--- a/tests/ut/cpp/dataset/random_rotation_op_test.cc
+++ b/tests/ut/cpp/dataset/random_rotation_op_test.cc
@@ -16,8 +16,8 @@
 #include <opencv2/imgcodecs.hpp>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc b/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
index a2583cab96..db8cc89893 100644
--- a/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
+++ b/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc
new file mode 100644
index 0000000000..d1946ef700
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomVerticalFlipWithBBox_C";
+
+class MindDataTestRandomVerticalFlipWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomVerticalFlipWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestRandomVerticalFlipWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomVerticalFlipWithBBoxOp.";
+  TensorTable results;
+  std::unique_ptr<RandomVerticalFlipWithBBoxOp> op(new RandomVerticalFlipWithBBoxOp(1));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  MS_LOG(INFO) << "testRandomVerticalFlipWithBBoxOp end.";
+}
diff --git a/tests/ut/cpp/dataset/rename_op_test.cc b/tests/ut/cpp/dataset/rename_op_test.cc
index b6849ec53e..ac64346c26 100644
--- a/tests/ut/cpp/dataset/rename_op_test.cc
+++ b/tests/ut/cpp/dataset/rename_op_test.cc
@@ -17,15 +17,15 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_buffer.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
@@ -51,7 +51,7 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) {
   auto my_tree = std::make_shared<ExecutionTree>();
   // Creating TFReaderOp
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
       .SetDatasetFilesList({dataset_path})
diff --git a/tests/ut/cpp/dataset/repeat_op_test.cc b/tests/ut/cpp/dataset/repeat_op_test.cc
index 42549546ba..74d494c0dc 100644
--- a/tests/ut/cpp/dataset/repeat_op_test.cc
+++ b/tests/ut/cpp/dataset/repeat_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/rescale_op_test.cc b/tests/ut/cpp/dataset/rescale_op_test.cc
index 86abbe972e..5d9bf32a9f 100644
--- a/tests/ut/cpp/dataset/rescale_op_test.cc
+++ b/tests/ut/cpp/dataset/rescale_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_bilinear_op_test.cc b/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
index 8642484149..910c8af2a2 100644
--- a/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
+++ b/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_op_test.cc b/tests/ut/cpp/dataset/resize_op_test.cc
index e23320a65a..807668dde4 100644
--- a/tests/ut/cpp/dataset/resize_op_test.cc
+++ b/tests/ut/cpp/dataset/resize_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc
new file mode 100644
index 0000000000..f9eaf85a55
--- /dev/null
+++ b/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "ResizeWithBBox_C";
+
+class MindDataTestResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestResizeWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testResizeWithBBox.";
+  // resize
+  TensorTable results;
+  std::unique_ptr<ResizeWithBBoxOp> op(new ResizeWithBBoxOp(500));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    // selected a tensorRow
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+
+  MS_LOG(INFO) << "testResizeWithBBox end.";
+}
diff --git a/tests/ut/cpp/dataset/schema_test.cc b/tests/ut/cpp/dataset/schema_test.cc
index 2da61bc047..95b9c75d9e 100644
--- a/tests/ut/cpp/dataset/schema_test.cc
+++ b/tests/ut/cpp/dataset/schema_test.cc
@@ -19,11 +19,11 @@
 #include <string>
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/shuffle_op_test.cc b/tests/ut/cpp/dataset/shuffle_op_test.cc
index c9bcb24c4e..98b4878efb 100644
--- a/tests/ut/cpp/dataset/shuffle_op_test.cc
+++ b/tests/ut/cpp/dataset/shuffle_op_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/skip_op_test.cc b/tests/ut/cpp/dataset/skip_op_test.cc
index 697745512d..387d2f69ff 100644
--- a/tests/ut/cpp/dataset/skip_op_test.cc
+++ b/tests/ut/cpp/dataset/skip_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
index dfe15a8f15..96e9652bbc 100644
--- a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
+++ b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
@@ -15,13 +15,13 @@
  */
 
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/status_test.cc b/tests/ut/cpp/dataset/status_test.cc
index c64a86b8ba..195da1c119 100644
--- a/tests/ut/cpp/dataset/status_test.cc
+++ b/tests/ut/cpp/dataset/status_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/subset_random_sampler_test.cc b/tests/ut/cpp/dataset/subset_random_sampler_test.cc
index 22200ccbac..c389686014 100644
--- a/tests/ut/cpp/dataset/subset_random_sampler_test.cc
+++ b/tests/ut/cpp/dataset/subset_random_sampler_test.cc
@@ -16,11 +16,11 @@
 #include "common/common.h"
 #include "gtest/gtest.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
 
 #include <vector>
 #include <unordered_set>
diff --git a/tests/ut/cpp/dataset/take_op_test.cc b/tests/ut/cpp/dataset/take_op_test.cc
index b7be066d6c..a8bfe40b10 100644
--- a/tests/ut/cpp/dataset/take_op_test.cc
+++ b/tests/ut/cpp/dataset/take_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/task_manager_test.cc b/tests/ut/cpp/dataset/task_manager_test.cc
index 3d34ec9ec5..7b8101fa56 100644
--- a/tests/ut/cpp/dataset/task_manager_test.cc
+++ b/tests/ut/cpp/dataset/task_manager_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc b/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc
new file mode 100644
index 0000000000..70832c04b5
--- /dev/null
+++ b/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc
@@ -0,0 +1,105 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <string>
+#include "minddata/dataset/core/client.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::MsLogLevel::INFO;
+
+class MindDataTestTensorOpFusionPass : public UT::DatasetOpTesting {
+ public:
+  MindDataTestTensorOpFusionPass() = default;
+  void SetUp() override { GlobalInit(); }
+};
+
+TEST_F(MindDataTestTensorOpFusionPass, RandomCropDecodeResize_fusion_disabled) {
+  MS_LOG(INFO) << "Doing RandomCropDecodeResize_fusion";
+  std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                             bool shuf = false, std::shared_ptr<Sampler> sampler = nullptr,
+                                             std::map<std::string, int32_t> map = {}, bool decode = false);
+  std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+  auto rcar_op = std::make_shared<RandomCropAndResizeOp>();
+  auto decode_op = std::make_shared<DecodeOp>();
+  Status rc;
+  std::vector<std::shared_ptr<TensorOp>> func_list;
+  func_list.push_back(decode_op);
+  func_list.push_back(rcar_op);
+  std::shared_ptr<MapOp> map_op;
+  MapOp::Builder map_decode_builder;
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
+  rc = map_decode_builder.Build(&map_op);
+  EXPECT_TRUE(rc.IsOk());
+  auto tree = std::make_shared<ExecutionTree>();
+  tree = Build({ImageFolder(16, 2, 32, "./", false), map_op});
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc);
+  rc = tree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc.IsError());
+  auto it = tree->begin();
+  ++it;
+  auto *m_op = &(*it);
+  auto tfuncs = static_cast<MapOp *>(m_op)->TFuncs();
+  auto func_it = tfuncs.begin();
+  EXPECT_EQ((*func_it)->Name(), kDecodeOp);
+  ++func_it;
+  EXPECT_EQ((*func_it)->Name(), kRandomCropAndResizeOp);
+}
+
+TEST_F(MindDataTestTensorOpFusionPass, RandomCropDecodeResize_fusion_enabled) {
+  MS_LOG(INFO) << "Doing RandomCropDecodeResize_fusion";
+  std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                             bool shuf = false, std::shared_ptr<Sampler> sampler = nullptr,
+                                             std::map<std::string, int32_t> map = {}, bool decode = false);
+  std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+  auto rcar_op = std::make_shared<RandomCropAndResizeOp>();
+  auto decode_op = std::make_shared<DecodeOp>();
+  Status rc;
+  std::vector<std::shared_ptr<TensorOp>> func_list;
+  func_list.push_back(decode_op);
+  func_list.push_back(rcar_op);
+  std::shared_ptr<MapOp> map_op;
+  MapOp::Builder map_decode_builder;
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
+  rc = map_decode_builder.Build(&map_op);
+  EXPECT_TRUE(rc.IsOk());
+  auto tree = std::make_shared<ExecutionTree>();
+  tree = Build({ImageFolder(16, 2, 32, "./", false), map_op});
+  rc = tree->SetOptimize(true);
+  EXPECT_TRUE(rc);
+  rc = tree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc.IsError());
+  auto it = tree->begin();
+  ++it;
+  auto *m_op = &(*it);
+  auto tfuncs = static_cast<MapOp *>(m_op)->TFuncs();
+  auto func_it = tfuncs.begin();
+  EXPECT_EQ((*func_it)->Name(), kRandomCropDecodeResizeOp);
+  EXPECT_EQ(++func_it, tfuncs.end());
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/tensor_string_test.cc b/tests/ut/cpp/dataset/tensor_string_test.cc
index 43b235304d..fe336a34c5 100644
--- a/tests/ut/cpp/dataset/tensor_string_test.cc
+++ b/tests/ut/cpp/dataset/tensor_string_test.cc
@@ -15,13 +15,13 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc
index 1aa3cad2fa..fce4652b47 100644
--- a/tests/ut/cpp/dataset/tensor_test.cc
+++ b/tests/ut/cpp/dataset/tensor_test.cc
@@ -15,13 +15,13 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
 
 using namespace mindspore::dataset;
 
@@ -432,3 +432,17 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
   s = t1->Concatenate({5}, t2);
   EXPECT_FALSE(s.IsOk());
 }
+
+TEST_F(MindDataTestTensorDE, TensorEmpty) {
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  ASSERT_TRUE(t->HasData());
+}
+
+TEST_F(MindDataTestTensorDE, TensorEmptyInvalidate) {
+  std::vector<uint32_t> values1 = {1, 2, 3, 0, 0, 0};
+  std::shared_ptr<Tensor> t;
+  Tensor::CreateTensor(&t, values1);
+  t->Invalidate();
+  ASSERT_TRUE(t->HasData());
+}
+
diff --git a/tests/ut/cpp/dataset/tensorshape_test.cc b/tests/ut/cpp/dataset/tensorshape_test.cc
index 1af0bf9c82..65ab386db0 100644
--- a/tests/ut/cpp/dataset/tensorshape_test.cc
+++ b/tests/ut/cpp/dataset/tensorshape_test.cc
@@ -15,10 +15,10 @@
  */
 #include <string>
 #include "./securec.h"
-#include "dataset/core/client.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_schema.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/text_file_op_test.cc b/tests/ut/cpp/dataset/text_file_op_test.cc
index 7887eda955..bc2674a6a3 100644
--- a/tests/ut/cpp/dataset/text_file_op_test.cc
+++ b/tests/ut/cpp/dataset/text_file_op_test.cc
@@ -17,13 +17,13 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 
diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc
index 9b312296d8..30fde33ff9 100644
--- a/tests/ut/cpp/dataset/tfReader_op_test.cc
+++ b/tests/ut/cpp/dataset/tfReader_op_test.cc
@@ -17,8 +17,8 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/data_schema.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/to_float16_op_test.cc b/tests/ut/cpp/dataset/to_float16_op_test.cc
index 9c49c67b2c..5c886690c9 100644
--- a/tests/ut/cpp/dataset/to_float16_op_test.cc
+++ b/tests/ut/cpp/dataset/to_float16_op_test.cc
@@ -15,9 +15,9 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/tokenizer_op_test.cc b/tests/ut/cpp/dataset/tokenizer_op_test.cc
index 8a18f0da0c..cc2d7473ff 100644
--- a/tests/ut/cpp/dataset/tokenizer_op_test.cc
+++ b/tests/ut/cpp/dataset/tokenizer_op_test.cc
@@ -18,14 +18,14 @@
 #include <string_view>
 
 #include "common/common.h"
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
@@ -45,227 +45,245 @@ class MindDataTestTokenizerOp : public UT::Common {
 
 TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
   MS_LOG(INFO) << "Doing TestUnicodeCharTokenizerOp.";
-  std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp());
+  std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp(true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Hello World!");
-  std::shared_ptr<Tensor> output;
-  Status s = op->Compute(input, &output);
+  TensorRow output;
+  Status s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 12);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "H");
-  CheckEqual(output, {1}, "e");
-  CheckEqual(output, {2}, "l");
-  CheckEqual(output, {3}, "l");
-  CheckEqual(output, {4}, "o");
-  CheckEqual(output, {5}, " ");
-  CheckEqual(output, {6}, "W");
-  CheckEqual(output, {7}, "o");
-  CheckEqual(output, {8}, "r");
-  CheckEqual(output, {9}, "l");
-  CheckEqual(output, {10}, "d");
-  CheckEqual(output, {11}, "!");
+  EXPECT_EQ(output[0]->Size(), 12);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
+  CheckEqual(output[0], {1}, "e");
+  CheckEqual(output[0], {2}, "l");
+  CheckEqual(output[0], {3}, "l");
+  CheckEqual(output[0], {4}, "o");
+  CheckEqual(output[0], {5}, " ");
+  CheckEqual(output[0], {6}, "W");
+  CheckEqual(output[0], {7}, "o");
+  CheckEqual(output[0], {8}, "r");
+  CheckEqual(output[0], {9}, "l");
+  CheckEqual(output[0], {10}, "d");
+  CheckEqual(output[0], {11}, "!");
 
   input = std::make_shared<Tensor>("中国 你好!");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 6);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "中");
-  CheckEqual(output, {1}, "国");
-  CheckEqual(output, {2}, " ");
-  CheckEqual(output, {3}, "你");
-  CheckEqual(output, {4}, "好");
-  CheckEqual(output, {5}, "!");
+  EXPECT_EQ(output[0]->Size(), 6);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "中");
+  CheckEqual(output[0], {1}, "国");
+  CheckEqual(output[0], {2}, " ");
+  CheckEqual(output[0], {3}, "你");
+  CheckEqual(output[0], {4}, "好");
+  CheckEqual(output[0], {5}, "!");
 
   input = std::make_shared<Tensor>("中");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "中");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "中");
 
   input = std::make_shared<Tensor>("H");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "H");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
 
   input = std::make_shared<Tensor>("  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 2);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, " ");
-  CheckEqual(output, {1}, " ");
+  EXPECT_EQ(output[0]->Size(), 2);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, " ");
+  CheckEqual(output[0], {1}, " ");
 
   input = std::make_shared<Tensor>("");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor6: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
   MS_LOG(INFO) << "Doing TestWhitespaceTokenizerOp.";
-  std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp());
+  std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp(true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China.");
-  std::shared_ptr<Tensor> output;
-  Status s = op->Compute(input, &output);
+  TensorRow output;
+  Status s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 3);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "China.");
+  EXPECT_EQ(output[0]->Size(), 3);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "China.");
 
   input = std::make_shared<Tensor>("  hello");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("hello");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("hello  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
   MS_LOG(INFO) << "Doing TestUnicodeScriptTokenizer.";
-  std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true));
-  std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false));
+  std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true, true));
+  std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false, true));
 
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = keep_whitespace_op->Compute(input, &output);
+  TensorRow output;
+  Status s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 10);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, " ");
-  CheckEqual(output, {2}, "to");
-  CheckEqual(output, {3}, " ");
-  CheckEqual(output, {4}, "China");
-  CheckEqual(output, {5}, ".");
-  CheckEqual(output, {6}, " \n ");
-  CheckEqual(output, {7}, "中国");
-  CheckEqual(output, {8}, "\t");
-  CheckEqual(output, {9}, "北京");
-  s = skip_whitespace_op->Compute(input, &output);
+  EXPECT_EQ(output[0]->Size(), 10);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, " ");
+  CheckEqual(output[0], {2}, "to");
+  CheckEqual(output[0], {3}, " ");
+  CheckEqual(output[0], {4}, "China");
+  CheckEqual(output[0], {5}, ".");
+  CheckEqual(output[0], {6}, " \n ");
+  CheckEqual(output[0], {7}, "中国");
+  CheckEqual(output[0], {8}, "\t");
+  CheckEqual(output[0], {9}, "北京");
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 6);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "China");
-  CheckEqual(output, {3}, ".");
-  CheckEqual(output, {4}, "中国");
-  CheckEqual(output, {5}, "北京");
+  EXPECT_EQ(output[0]->Size(), 6);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "China");
+  CheckEqual(output[0], {3}, ".");
+  CheckEqual(output[0], {4}, "中国");
+  CheckEqual(output[0], {5}, "北京");
 
   input = std::make_shared<Tensor>("  Welcome to 中国.  ");
-  s = skip_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 4);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "中国");
-  CheckEqual(output, {3}, ".");
-  s = keep_whitespace_op->Compute(input, &output);
+  EXPECT_EQ(output[0]->Size(), 4);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "中国");
+  CheckEqual(output[0], {3}, ".");
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 8);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "  ");
-  CheckEqual(output, {1}, "Welcome");
-  CheckEqual(output, {2}, " ");
-  CheckEqual(output, {3}, "to");
-  CheckEqual(output, {4}, " ");
-  CheckEqual(output, {5}, "中国");
-  CheckEqual(output, {6}, ".");
-  CheckEqual(output, {7}, "  ");
+  EXPECT_EQ(output[0]->Size(), 8);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "  ");
+  CheckEqual(output[0], {1}, "Welcome");
+  CheckEqual(output[0], {2}, " ");
+  CheckEqual(output[0], {3}, "to");
+  CheckEqual(output[0], {4}, " ");
+  CheckEqual(output[0], {5}, "中国");
+  CheckEqual(output[0], {6}, ".");
+  CheckEqual(output[0], {7}, "  ");
 
   input = std::make_shared<Tensor>("Hello");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, "Hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Hello");
 
   input = std::make_shared<Tensor>("H");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor6: " << output->ToString();
-  CheckEqual(output, {0}, "H");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
 
   input = std::make_shared<Tensor>("");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor7: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor7: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 
   input = std::make_shared<Tensor>("Hello中国Hello世界");
-  s = keep_whitespace_op->Compute(input, &output); EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 4);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor8: " << output->ToString();
-  CheckEqual(output, {0}, "Hello");
-  CheckEqual(output, {1}, "中国");
-  CheckEqual(output, {2}, "Hello");
-  CheckEqual(output, {3}, "世界");
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
+  EXPECT_EQ(output[0]->Size(), 4);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor8: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Hello");
+  CheckEqual(output[0], {1}, "中国");
+  CheckEqual(output[0], {2}, "Hello");
+  CheckEqual(output[0], {3}, "世界");
 
   input = std::make_shared<Tensor>("   ");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor10: " << output->ToString();
-  CheckEqual(output, {0}, "   ");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor10: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "   ");
   input = std::make_shared<Tensor>("   ");
-  s = skip_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor11: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor11: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestCaseFold) {
@@ -321,10 +339,10 @@ TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
 
 TEST_F(MindDataTestTokenizerOp, TestRegexTokenizer) {
   MS_LOG(INFO) << "Doing TestRegexTokenizerOp.";
-  std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", ""));
+  std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", "", true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = regex_tokenizer_op->Compute(input, &output);
+  TensorRow output;
+  Status s = regex_tokenizer_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
 }
 
@@ -332,9 +350,10 @@ TEST_F(MindDataTestTokenizerOp, TestBasicTokenizer) {
   MS_LOG(INFO) << "Doing TestBasicTokenizer.";
   //bool lower_case, bool keep_whitespace, 
   // NormalizeForm  normalization_form, bool preserve_unused_token
-  std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false));
+  std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false,
+                                                                         true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = basic_tokenizer->Compute(input, &output);
+  TensorRow output;
+  Status s = basic_tokenizer->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
 }
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/treap_test.cc b/tests/ut/cpp/dataset/treap_test.cc
index b454ab108e..b9c534719c 100644
--- a/tests/ut/cpp/dataset/treap_test.cc
+++ b/tests/ut/cpp/dataset/treap_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/treap.h"
+#include "minddata/dataset/util/treap.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/trucate_pair_test.cc b/tests/ut/cpp/dataset/trucate_pair_test.cc
index 95e2aaa11b..af7e61c16a 100644
--- a/tests/ut/cpp/dataset/trucate_pair_test.cc
+++ b/tests/ut/cpp/dataset/trucate_pair_test.cc
@@ -15,12 +15,12 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/type_cast_op_test.cc b/tests/ut/cpp/dataset/type_cast_op_test.cc
index 543eb71637..a94a7fedba 100644
--- a/tests/ut/cpp/dataset/type_cast_op_test.cc
+++ b/tests/ut/cpp/dataset/type_cast_op_test.cc
@@ -17,12 +17,12 @@
 #include <string>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/core/client.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/pybind_support.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/pybind_support.h"
 #include "gtest/gtest.h"
 #include "securec.h"
 
diff --git a/tests/ut/cpp/dataset/voc_op_test.cc b/tests/ut/cpp/dataset/voc_op_test.cc
index 05dc28b487..4bb212ffc7 100644
--- a/tests/ut/cpp/dataset/voc_op_test.cc
+++ b/tests/ut/cpp/dataset/voc_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/weighted_random_sampler_test.cc b/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
index d146ed10ac..bb3079aec8 100644
--- a/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
+++ b/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
@@ -16,11 +16,11 @@
 #include "common/common.h"
 #include "gtest/gtest.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
 #include "utils/log_adapter.h"
 
 #include <vector>
diff --git a/tests/ut/cpp/dataset/zip_op_test.cc b/tests/ut/cpp/dataset/zip_op_test.cc
index b387341398..8d74cb0969 100644
--- a/tests/ut/cpp/dataset/zip_op_test.cc
+++ b/tests/ut/cpp/dataset/zip_op_test.cc
@@ -21,17 +21,17 @@
 #include <memory>
 #include <string>
 #include <thread>
-#include "dataset/core/client.h"
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/config_manager.h"
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_buffer.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
@@ -58,7 +58,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
   auto my_tree = std::make_shared<ExecutionTree>();
   // Creating TFReaderOp
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
@@ -142,7 +142,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
   MS_LOG(INFO) << "UT test TestZipRepeat.";
   auto my_tree = std::make_shared<ExecutionTree>();
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
diff --git a/tests/ut/cpp/device/ascend_kernel_runtime_test.cc b/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
index effa0b212d..2aa9512808 100644
--- a/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
+++ b/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
@@ -18,7 +18,7 @@
 
 #include "common/common_test.h"
 
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/device/ascend_profiling_test.cc b/tests/ut/cpp/device/ascend_profiling_test.cc
index 2829a5fd4a..f862d84c4a 100644
--- a/tests/ut/cpp/device/ascend_profiling_test.cc
+++ b/tests/ut/cpp/device/ascend_profiling_test.cc
@@ -18,12 +18,12 @@
 
 #include "./prof_reporter.h"
 #include "common/common_test.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include "./common.h"
 #define private public
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #undef private
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 
 namespace mindspore {
 namespace device {
diff --git a/tests/ut/cpp/ir/anf_test.cc b/tests/ut/cpp/ir/anf_test.cc
index c649518e21..9b217a2321 100644
--- a/tests/ut/cpp/ir/anf_test.cc
+++ b/tests/ut/cpp/ir/anf_test.cc
@@ -19,7 +19,7 @@
 #include "common/common_test.h"
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/ir/clone_test.cc b/tests/ut/cpp/ir/clone_test.cc
index bb8cae7fbb..20da3fb8b5 100644
--- a/tests/ut/cpp/ir/clone_test.cc
+++ b/tests/ut/cpp/ir/clone_test.cc
@@ -21,7 +21,7 @@
 #include "ir/manager.h"
 #include "utils/log_adapter.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "utils/graph_utils.h"
 #include "debug/draw.h"
 #include "./common.h"
diff --git a/tests/ut/cpp/ir/manager_test.cc b/tests/ut/cpp/ir/manager_test.cc
index 04b584ec10..3e6d1a312c 100644
--- a/tests/ut/cpp/ir/manager_test.cc
+++ b/tests/ut/cpp/ir/manager_test.cc
@@ -18,8 +18,8 @@
 #include "ir/dtype.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
 #include "debug/draw.h"
 #include "debug/label.h"
diff --git a/tests/ut/cpp/ir/value_test.cc b/tests/ut/cpp/ir/value_test.cc
index a71ef7a57f..b4ed5f438e 100644
--- a/tests/ut/cpp/ir/value_test.cc
+++ b/tests/ut/cpp/ir/value_test.cc
@@ -21,7 +21,7 @@
 
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/kernel/common_utils_test.cc b/tests/ut/cpp/kernel/common_utils_test.cc
index 4bc05b5c05..83f7c59e52 100644
--- a/tests/ut/cpp/kernel/common_utils_test.cc
+++ b/tests/ut/cpp/kernel/common_utils_test.cc
@@ -16,7 +16,7 @@
 
 #include <vector>
 #include "common/common_test.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
index 2a6b80f9e7..e5cba86230 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -58,9 +58,12 @@ class SparseApplyAdamCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &m_t) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices, std::vector<float> &m_t) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
     workspace_.push_back(CreateKernelAddress(m_t.data()));
   }
 
@@ -95,8 +98,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -120,8 +125,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -149,8 +156,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999715) < 1e-6);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
index c5c2394538..230c8cbf9e 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_ftrl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -56,9 +56,12 @@ class SparseApplyFtrlCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -86,7 +89,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.291479) < 1e-6);
@@ -110,7 +115,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.291479) < 1e-6);
@@ -138,7 +145,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
index 1765ed896f..a829ead90e 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -58,9 +58,12 @@ class SparseApplyLazyAdamCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -94,7 +97,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -118,7 +123,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -146,7 +153,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
index 23f66db58c..64bd5d3ef3 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -54,9 +54,12 @@ class SparseApplyProximalAdagradCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -85,7 +88,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.9929289) < 1e-6);
@@ -108,7 +113,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.9929289) < 1e-6);
@@ -135,7 +142,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/mindrecord/ut_common.h b/tests/ut/cpp/mindrecord/ut_common.h
index 8b244bf87a..ee943ab88e 100644
--- a/tests/ut/cpp/mindrecord/ut_common.h
+++ b/tests/ut/cpp/mindrecord/ut_common.h
@@ -25,10 +25,10 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h" 
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 using json = nlohmann::json;
 using std::ifstream;
 using std::pair;
diff --git a/tests/ut/cpp/mindrecord/ut_shard.cc b/tests/ut/cpp/mindrecord/ut_shard.cc
index b8c229e82f..11492e9f28 100644
--- a/tests/ut/cpp/mindrecord/ut_shard.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard.cc
@@ -23,10 +23,10 @@
 #include "configuration.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_header_test.cc b/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
index cea71c34b7..2ff3d1655d 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
@@ -29,13 +29,13 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_writer.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
index 140fff4166..8e264aafa0 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
@@ -29,10 +29,10 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
index 7fe60c3bfa..4501ea0800 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
@@ -24,11 +24,11 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_pk_sample.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_page_test.cc b/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
index dabd3d819f..a7e444c80f 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
@@ -21,7 +21,7 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_page.h"
 #include "ut_common.h"
 
 using json = nlohmann::json;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
index c532fe28b8..8b5eb2cf69 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
@@ -24,8 +24,8 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc b/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
index 8d9654a5ef..6863a25791 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
@@ -29,9 +29,9 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_page.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
index 3fa6812352..6b99e44d89 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
@@ -30,7 +30,7 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_segment.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
index 159efbf2f8..046b4f93d5 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
@@ -24,9 +24,9 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_writer.h"
-#include "mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/operator/cc_implementations_test.cc b/tests/ut/cpp/operator/cc_implementations_test.cc
index bac885db88..4bc5aea964 100644
--- a/tests/ut/cpp/operator/cc_implementations_test.cc
+++ b/tests/ut/cpp/operator/cc_implementations_test.cc
@@ -18,7 +18,7 @@
 #include <vector>
 
 #include "common/common_test.h"
-#include "operator/cc_implementations.h"
+#include "frontend/operator/cc_implementations.h"
 
 namespace mindspore {
 namespace prim {
diff --git a/tests/ut/cpp/operator/composite_test.cc b/tests/ut/cpp/operator/composite_test.cc
index 8ca318300a..a2108998bc 100644
--- a/tests/ut/cpp/operator/composite_test.cc
+++ b/tests/ut/cpp/operator/composite_test.cc
@@ -18,10 +18,10 @@
 #include "common/common_test.h"
 #include "ir/anf.h"
 #include "ir/value.h"
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/abstract_function.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -127,11 +127,17 @@ TEST_F(TestComposite, test_TupleSlice_arg_one_number) {
   try {
     trace::ClearTraceStack();
     engine_->Run(tupleSliceGraphPtr, args_spec_list);
-    FAIL() << "Excepted exception :Args type is wrong";
+    FAIL() << "Excepted exception: Args type is wrong";
   } catch (pybind11::type_error const &err) {
     ASSERT_TRUE(true);
+  } catch (std::runtime_error const &err) {
+    if (std::strstr(err.what(), "TypeError") != nullptr) {
+      ASSERT_TRUE(true);
+    } else {
+      FAIL() << "Excepted exception: Args type is wrong, message: " << err.what();
+    }
   } catch (...) {
-    FAIL() << "Excepted exception :Args type is wrong";
+    FAIL() << "Excepted exception: Args type is wrong";
   }
 }
 
diff --git a/tests/ut/cpp/operator/grad_implementations_test.cc b/tests/ut/cpp/operator/grad_implementations_test.cc
index e9035e63b6..f55553ab72 100644
--- a/tests/ut/cpp/operator/grad_implementations_test.cc
+++ b/tests/ut/cpp/operator/grad_implementations_test.cc
@@ -20,7 +20,7 @@
 #include "ir/value.h"
 #include "ir/manager.h"
 #include "common/common_test.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "debug/draw.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/operator/ops_test.cc b/tests/ut/cpp/operator/ops_test.cc
index 1d1389b54a..789b1cab25 100644
--- a/tests/ut/cpp/operator/ops_test.cc
+++ b/tests/ut/cpp/operator/ops_test.cc
@@ -19,8 +19,8 @@
 
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "ir/primitive.h"
-#include "operator/ops.h"
+#include "ir/primitive_py.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/operator/prim2func_test.cc b/tests/ut/cpp/operator/prim2func_test.cc
index 8f7c73a064..3952128b52 100644
--- a/tests/ut/cpp/operator/prim2func_test.cc
+++ b/tests/ut/cpp/operator/prim2func_test.cc
@@ -21,7 +21,7 @@
 
 #include "ir/anf.h"
 #include "ir/dtype.h"
-#include "operator/prim_to_function.h"
+#include "frontend/operator/prim_to_function.h"
 
 namespace mindspore {
 namespace prim {
diff --git a/tests/ut/cpp/optimizer/ad/ad_test.cc b/tests/ut/cpp/optimizer/ad/ad_test.cc
index 34612b5474..3f861d3604 100644
--- a/tests/ut/cpp/optimizer/ad/ad_test.cc
+++ b/tests/ut/cpp/optimizer/ad/ad_test.cc
@@ -16,7 +16,7 @@
 #include <iostream>
 #include <unordered_map>
 
-#include "optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/grad.h"
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
@@ -24,10 +24,10 @@
 #include "ir/func_graph_cloner.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/tests/ut/cpp/optimizer/cconv_test.cc b/tests/ut/cpp/optimizer/cconv_test.cc
index 8bd6957e85..c004409058 100644
--- a/tests/ut/cpp/optimizer/cconv_test.cc
+++ b/tests/ut/cpp/optimizer/cconv_test.cc
@@ -20,7 +20,7 @@
 
 #include "ir/func_graph_cloner.h"
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/optimizer/clean_test.cc b/tests/ut/cpp/optimizer/clean_test.cc
index c4f393c233..82bec1b5a8 100644
--- a/tests/ut/cpp/optimizer/clean_test.cc
+++ b/tests/ut/cpp/optimizer/clean_test.cc
@@ -19,9 +19,9 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
-#include "optimizer/clean.h"
+#include "frontend/optimizer/clean.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc
index bc8561f171..751b301283 100644
--- a/tests/ut/cpp/optimizer/lib_test.cc
+++ b/tests/ut/cpp/optimizer/lib_test.cc
@@ -25,11 +25,11 @@
 #include "ir/manager.h"
 #include "ir/value.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "pipeline/resource.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/opt_test.cc b/tests/ut/cpp/optimizer/opt_test.cc
index 2428d0dddb..c329adc4a5 100644
--- a/tests/ut/cpp/optimizer/opt_test.cc
+++ b/tests/ut/cpp/optimizer/opt_test.cc
@@ -22,13 +22,13 @@
 #include "ir/anf.h"
 #include "ir/visitor.h"
 #include "ir/func_graph_cloner.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
 
 #include "debug/draw.h"
-#include "operator/ops.h"
-#include "optimizer/cse.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/cse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/optimizer_test.cc b/tests/ut/cpp/optimizer/optimizer_test.cc
index ca7c589d47..c5c99531e4 100644
--- a/tests/ut/cpp/optimizer/optimizer_test.cc
+++ b/tests/ut/cpp/optimizer/optimizer_test.cc
@@ -20,10 +20,10 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "ir/anf.h"
-#include "operator/ops.h"
-#include "optimizer/cse.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/cse.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
index 0462993672..a500afc859 100644
--- a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
@@ -15,12 +15,12 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
index 291539c27d..190a189a2d 100644
--- a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
@@ -16,9 +16,9 @@
 
 #include "common/common_test.h"
 #include "ir/dtype/number.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
index 78d05c7235..7d63f03179 100644
--- a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
@@ -15,9 +15,9 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
index 919c5b43ec..b9b6bb67d9 100644
--- a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include <common/common_test.h>
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/tensor_info.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
index 1eb65b468f..7942fa2a10 100644
--- a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
@@ -15,9 +15,9 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
 #include <memory>
 #include "ir/value.h"
 
diff --git a/tests/ut/cpp/parallel/device_manager_test.cc b/tests/ut/cpp/parallel/device_manager_test.cc
index 056896f514..0c048d647b 100644
--- a/tests/ut/cpp/parallel/device_manager_test.cc
+++ b/tests/ut/cpp/parallel/device_manager_test.cc
@@ -15,9 +15,9 @@
  */
 #include <list>
 #include "common/common_test.h"
-#include "parallel/device.h"
-#include "parallel/device_manager.h"
-#include "parallel/group_manager.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/group_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/device_matrix_test.cc b/tests/ut/cpp/parallel/device_matrix_test.cc
index 877a211df8..57a438e76e 100644
--- a/tests/ut/cpp/parallel/device_matrix_test.cc
+++ b/tests/ut/cpp/parallel/device_matrix_test.cc
@@ -16,7 +16,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/device_matrix.h"
+#include "frontend/parallel/device_matrix.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/group_manager_test.cc b/tests/ut/cpp/parallel/group_manager_test.cc
index e3d2b3a364..fa4abfcb7e 100644
--- a/tests/ut/cpp/parallel/group_manager_test.cc
+++ b/tests/ut/cpp/parallel/group_manager_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 #include <list>
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 #include "common/common_test.h"
-#include "parallel/device.h"
-#include "parallel/group_manager.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/group_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/activation_info_test.cc b/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
index a9fe9b4c48..5f09de9e48 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc
index 9af7203799..9d129b7a18 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc
@@ -18,9 +18,9 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
index e54d1f2423..e49ed4e79d 100644
--- a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc b/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
index 947ad60cca..125723868a 100644
--- a/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc b/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
index 503edf2eda..029e0f2dc6 100644
--- a/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/get_next_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc b/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
index b59481e1f6..7037a85699 100644
--- a/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/l2_normalize_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
index cf5a4239a2..8de5c07226 100644
--- a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
index f710f51265..2d5676f211 100644
--- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
@@ -18,11 +18,11 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
index 07d150a294..074e4582f0 100644
--- a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
index c89bf97fb3..769d5bec45 100644
--- a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
+++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/pow_info_test.cc b/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
index 7b37a90fd8..f582640db8 100644
--- a/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/prelu_test.cc b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
index d6db1b8460..1d4cf5eff0 100644
--- a/tests/ut/cpp/parallel/ops_info/prelu_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/prelu_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
index a1fe46ca33..64ba6af70b 100644
--- a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
@@ -18,11 +18,11 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/reshape_test.cc b/tests/ut/cpp/parallel/ops_info/reshape_test.cc
index fb60c6d250..8cc8390e9a 100644
--- a/tests/ut/cpp/parallel/ops_info/reshape_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/reshape_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc b/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
index 03634b9a6f..d370c168c9 100644
--- a/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/loss_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/loss_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
index bba6e89626..9c4205672b 100644
--- a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc b/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
index a892c5c84a..2be6c5bf7f 100644
--- a/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
index 42d292c605..b523652fcb 100644
--- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
index eabac51e17..461a27d4ed 100644
--- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/device_manager.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/transpose_test.cc b/tests/ut/cpp/parallel/ops_info/transpose_test.cc
index 991ec47820..fe5cbb01b3 100644
--- a/tests/ut/cpp/parallel/ops_info/transpose_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/transpose_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/transpose_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/step_auto_parallel_test.cc b/tests/ut/cpp/parallel/step_auto_parallel_test.cc
index a1474ca244..6cf7ec66c6 100644
--- a/tests/ut/cpp/parallel/step_auto_parallel_test.cc
+++ b/tests/ut/cpp/parallel/step_auto_parallel_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "parallel/step_parallel.h"
-#include "parallel/step_auto_parallel.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/step_parallel_test.cc b/tests/ut/cpp/parallel/step_parallel_test.cc
index d8f8681a34..5657db8790 100644
--- a/tests/ut/cpp/parallel/step_parallel_test.cc
+++ b/tests/ut/cpp/parallel/step_parallel_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "parallel/step_parallel.h"
-#include "parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/strategy_test.cc b/tests/ut/cpp/parallel/strategy_test.cc
index 9a2f92f018..c13b71944e 100644
--- a/tests/ut/cpp/parallel/strategy_test.cc
+++ b/tests/ut/cpp/parallel/strategy_test.cc
@@ -17,7 +17,7 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
index 2ba8cc9dfc..b80f199035 100644
--- a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
@@ -17,10 +17,10 @@
 #include <vector>
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc b/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
index 5291e2f48d..4ddc130a45 100644
--- a/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
@@ -17,8 +17,8 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
 #include "util_layout_gen_test.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc b/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
index 1b1dd4af04..f6caad2f9d 100644
--- a/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
@@ -16,8 +16,8 @@
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/device_manager.h"
 #include "util_layout_gen_test.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc b/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
index 9d6152721e..11f471ea33 100644
--- a/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
@@ -17,8 +17,8 @@
 #include <algorithm>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
 #include "util_layout_gen_test.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc b/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
index b5e2ea3e5b..824ab876cd 100644
--- a/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
@@ -16,7 +16,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc b/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
index bae05d650a..15fb16f088 100644
--- a/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
@@ -17,7 +17,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc b/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
index 572763faa3..40a4017c4b 100644
--- a/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
@@ -17,7 +17,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
index 6f5c1e49ed..330b571ae7 100644
--- a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
@@ -21,7 +21,7 @@
 #include <utility>
 #include <algorithm>
 #include <iterator>
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "common/common_test.h"
 
 using std::pow;
diff --git a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
index a359cadbea..c16a1fc6d4 100644
--- a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
+++ b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
@@ -20,7 +20,7 @@
 #include <tuple>
 #include <vector>
 
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/virtual_dataset_test.cc b/tests/ut/cpp/parallel/virtual_dataset_test.cc
index 1d3ff081c7..4cafdebc17 100644
--- a/tests/ut/cpp/parallel/virtual_dataset_test.cc
+++ b/tests/ut/cpp/parallel/virtual_dataset_test.cc
@@ -17,10 +17,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/virtual_dataset_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc b/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
index 3c97cfb203..2d21b591ea 100644
--- a/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_class_test.cc b/tests/ut/cpp/pipeline/parse/parser_class_test.cc
index dcedc32b1b..8d9cc8ebc8 100644
--- a/tests/ut/cpp/pipeline/parse/parser_class_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_class_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc b/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
index fd8438503f..1f54298a81 100644
--- a/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc b/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
index adc09cca32..937ad1fe5e 100644
--- a/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_test.cc b/tests/ut/cpp/pipeline/parse/parser_test.cc
index 4d7731dfd1..f1d9087110 100644
--- a/tests/ut/cpp/pipeline/parse/parser_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/resolve_test.cc b/tests/ut/cpp/pipeline/parse/resolve_test.cc
index 8ade92bb34..5a2d0ebd7f 100644
--- a/tests/ut/cpp/pipeline/parse/resolve_test.cc
+++ b/tests/ut/cpp/pipeline/parse/resolve_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/resource_test.cc b/tests/ut/cpp/pipeline/resource_test.cc
index 09bd2060dc..b6be393652 100644
--- a/tests/ut/cpp/pipeline/resource_test.cc
+++ b/tests/ut/cpp/pipeline/resource_test.cc
@@ -18,9 +18,9 @@
 
 #include "common/common_test.h"
 #include "utils/log_adapter.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "ir/primitive.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/tests/ut/cpp/pipeline/static_analysis/data_test.cc b/tests/ut/cpp/pipeline/static_analysis/data_test.cc
index 61a22bbe5f..fb9d8b1f7e 100644
--- a/tests/ut/cpp/pipeline/static_analysis/data_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/data_test.cc
@@ -18,9 +18,9 @@
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc b/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
index eebe6c252b..664f353faa 100644
--- a/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/evaluator.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/helper.cc b/tests/ut/cpp/pipeline/static_analysis/helper.cc
index db697e95e0..ebf8c233e2 100644
--- a/tests/ut/cpp/pipeline/static_analysis/helper.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/helper.cc
@@ -16,7 +16,7 @@
 
 #include "pipeline/static_analysis/helper.h"
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/helper.h b/tests/ut/cpp/pipeline/static_analysis/helper.h
index 7ca902a1e9..44c647779e 100644
--- a/tests/ut/cpp/pipeline/static_analysis/helper.h
+++ b/tests/ut/cpp/pipeline/static_analysis/helper.h
@@ -17,7 +17,7 @@
 #ifndef TESTS_UT_PIPELINE_STATIC_ANALYSIS_HELPER_H_
 #define TESTS_UT_PIPELINE_STATIC_ANALYSIS_HELPER_H_
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
index 04a14a0f29..8ebea4d212 100644
--- a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
@@ -21,9 +21,9 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 #include "pipeline/static_analysis/helper.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/draw.h"
 #include "ir/tensor.h"
 #include "utils/symbolic.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc b/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
index 23ea55f8f7..e32a86d9be 100644
--- a/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
@@ -20,8 +20,8 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/program_specialize.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
 #include "pipeline/static_analysis/helper.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
index 8a58969e12..78d3a7083a 100644
--- a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
@@ -16,16 +16,16 @@
 #include <iostream>
 #include <memory>
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 #include "pipeline/static_analysis/helper.h"
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
 #include "ir/tensor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/resource.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
index 483c144930..58b810a3e1 100644
--- a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
@@ -17,23 +17,23 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "kernel/kernel.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
-#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
index e4ab2431b7..ba64c206af 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
@@ -15,14 +15,14 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
index 56bf0ae4e0..2be25212e8 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
index 22cf70ded3..103d0f21a4 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
 #undef private
 #undef protected
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
index 72ce73e20f..89d680f442 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
@@ -16,18 +16,18 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "common/backend_common_test.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/format_type/check_consistency.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
index 317eace6c6..2b61a49048 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "device/kernel_info.h"
-#include "pre_activate/ascend/format_type/insert_cast.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
index 8c57238e0a..0a5cf3dd9e 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
index c0017c2deb..69e7fa8b27 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
index 90174636b1..8ec2b22a79 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
@@ -18,7 +18,7 @@
 #include "common/py_func_graph_fetcher.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
index 06895cb081..f793e0371b 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
index ea4a5c0d5d..80f30c8938 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
index dc437221f8..f0a5a857b9 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
index c5ebc28b48..9f4f31bf82 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
@@ -15,20 +15,20 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
index c0a0cc455e..c726142e99 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fission/lars_v2_fission.h"
+#include "backend/optimizer/ascend/ir_fission/lars_v2_fission.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
index 1df87960e3..4303485d85 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
index b0aa455a0a..9f84f22678 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
index ab70e83480..30de43be4e 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
@@ -18,7 +18,7 @@
 #include "common/py_func_graph_fetcher.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
index faebe0e4a0..1c928b581d 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
index b09268aa66..2ab614d4c2 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
@@ -16,13 +16,13 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
index f2b975a08e..220e45f10a 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
@@ -16,16 +16,16 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
index c2ee7b6519..2759864037 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
index 014e60f579..78c815bf50 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc
new file mode 100644
index 0000000000..5d42ff7069
--- /dev/null
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#include "debug/anf_ir_dump.h"
+
+#define private public
+#define protected public
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
+#undef private
+#undef protected
+
+namespace mindspore {
+namespace opt {
+class TestHWAddInputToOutput : public BackendCommon {
+ public:
+  TestHWAddInputToOutput() : getPyFun_("gtest_input.pre_activate.add_input_to_output_test", true) {}
+  ~TestHWAddInputToOutput() override = default;
+
+ public:
+  UT::PyFuncGraphFetcher getPyFun_;
+};
+
+class MockOpFinder : public OpFinder {
+ public:
+  MockOpFinder() = default;
+  ~MockOpFinder() override = default;
+  int GetOpRegisteredOutputNum(const std::string &op_name) override { return 2; }
+};
+
+TEST_F(TestHWAddInputToOutput, test_add_input_to_output) {
+  FuncGraphPtr g = getPyFun_.CallAndParseRet("test_add_input_to_output", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 5; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+  EXPECT_NE(kg, nullptr);
+  auto ret = kg->get_return();
+  EXPECT_NE(ret, nullptr);
+  auto make_tuple = ret->input(1);
+  EXPECT_NE(make_tuple, nullptr);
+  auto momentum = make_tuple->cast<CNodePtr>()->input(1);
+  EXPECT_NE(momentum, nullptr);
+  EXPECT_NE(momentum->abstract(), nullptr);
+  EXPECT_FALSE(momentum->abstract()->isa<abstract::AbstractTuple>());
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto pass = std::make_shared<opt::AddInputToOutput>();
+  pass->op_finder_ = std::make_shared<MockOpFinder>();
+  pm->AddPass(pass);
+  optimizer->AddPassManager(pm);
+  (void)optimizer->Optimize(kg);
+  EXPECT_TRUE(momentum->abstract()->isa<abstract::AbstractTuple>());
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
index 466cba8e67..d9d0baf7be 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
index d1fc2783ac..1b64e5fd00 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
index 0c8bf67391..aa56d79239 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
index 4160c3a8e4..ac01f9b1dd 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
index 2044857841..be6bd95b02 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
index 05fa2c65df..068cc0d12e 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
index ffa5a42b4d..663ed309ee 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
index 597b7b18ff..f7cbfdc678 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
index 6ea622d030..64c004ff27 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
@@ -17,7 +17,7 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
index 36f0321511..776ce625b7 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
index fbb1f5e913..bf21649672 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
index f1ca92c811..6a7c866ab4 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
index 7a2806162b..4de2de2700 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
index 05262e72ab..5be6195da2 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
@@ -17,7 +17,7 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
index 44b9b3df69..7392d05b98 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
@@ -15,13 +15,13 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
index c8f97be290..f67eda9776 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
index 114fcf4233..50dfd66f54 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
index 87bb21f89a..b293cdeecb 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
index ab9718d80a..8ac106f81c 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
+#include "mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
index 59140e91a1..6792f4720a 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
@@ -17,8 +17,8 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
index 5f02f0e9c1..f6e8a1194c 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
index 2dd858a0fc..efe5433d75 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
index 3290acd42f..6ec407d2ea 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
@@ -17,8 +17,8 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
index 98dc9e9efc..d156959c4c 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
@@ -16,14 +16,14 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/context/ms_context.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc b/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
index 7b0e2cc9db..12030433fc 100644
--- a/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
+++ b/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
@@ -20,8 +20,8 @@
 #include <algorithm>
 
 #include "common/common_test.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/visit.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/visit.h"
 #include "utils/base_ref.h"
 #include "ir/anf.h"
 
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
index 5b237fda58..8b6d3e061a 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
@@ -18,7 +18,7 @@
 #include <vector>
 #include <string>
 
-#include "pre_activate/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
 
 #include "utils/utils.h"
 #include "common/common_test.h"
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
index e0966d2d12..2a6904658e 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
@@ -17,9 +17,9 @@
 #include <memory>
 #include <vector>
 #include <string>
-#include "operator/ops.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "frontend/operator/ops.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
index a36463d297..31ae923c0a 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
@@ -16,19 +16,19 @@
 #include <memory>
 #include <vector>
 #include <string>
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "session/ascend_session.h"
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
-#include "operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/ascend_session.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "pipeline/resource.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "pipeline/jit/resource.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
index 69a330614e..02e1865a82 100644
--- a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/pass/communication_op_fusion.h"
-#include "pre_activate/common/optimizer.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/pass_manager.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc b/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
index 12c4d35db5..cfcc34970b 100644
--- a/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "device/kernel_info.h"
-#include "pre_activate/pass/common_subexpression_elimination.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc b/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
index 8fc709433e..25e4b3c111 100644
--- a/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
index fcb3b19a24..ac3272317a 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
index 1749e54d94..5b303d15a5 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc b/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
index aded376536..2c1dfc1c6c 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc b/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
index eeb01270e2..458c854218 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc b/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
index 3e43155011..07bef7a042 100644
--- a/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
@@ -15,26 +15,26 @@
  */
 
 #include "common/backend_common_test.h"
-#include "kernel/kernel.h"
-#include "operator/ops.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-// #include "device/optimizer/pass/insert_trans_op.h"
-#include "pre_activate/ascend/format_type/insert_cast.h"
-#include "pre_activate/pass/eliminate_redundant_op.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
+// #include "runtime/device/optimizer/pass/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc b/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
index b172e1b351..555dd95426 100644
--- a/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
@@ -15,14 +15,14 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/getitem_tuple.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc b/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
index 04461e6602..f9cfe273bc 100644
--- a/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/optimize_dependence.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pynative/pynative_execute_test.cc b/tests/ut/cpp/pynative/pynative_execute_test.cc
index a0d1516b58..c5f25ca484 100644
--- a/tests/ut/cpp/pynative/pynative_execute_test.cc
+++ b/tests/ut/cpp/pynative/pynative_execute_test.cc
@@ -16,10 +16,10 @@
 #include <iostream>
 #include <memory>
 #include "common/common_test.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
-#include "pynative/pynative_execute.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/pynative/pynative_execute.h"
 #include "utils/context/ms_context.h"
 #include "utils/utils.h"
 
diff --git a/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py b/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
index e38c61f16e..bcfa077ea5 100644
--- a/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
@@ -17,8 +17,8 @@ import numpy as np
 
 import mindspore as ms
 from mindspore.common.tensor import Tensor
-from mindspore.model_zoo.resnet import resnet50
 from mindspore.ops import Primitive
+from tests.ut.python.model.resnet import resnet50
 
 scala_add = Primitive('scalar_add')
 
diff --git a/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py b/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
index fa5b1b9055..28bded6401 100644
--- a/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
+++ b/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
@@ -22,9 +22,9 @@ from mindspore.common import dtype
 from mindspore.common.api import ms_function, _executor
 from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
-from mindspore.model_zoo.resnet import resnet50
 from mindspore.ops import functional as F
 from mindspore.train.model import Model
+from tests.ut.python.model.resnet import resnet50
 
 
 def test_high_order_function(a):
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py
new file mode 100644
index 0000000000..4d4fa1fe96
--- /dev/null
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from mindspore.ops import operations as P
+
+ApplyMomentum = P.ApplyMomentum()
+
+
+class FnDict:
+    def __init__(self):
+        self.fnDict = {}
+
+    def __call__(self, fn):
+        self.fnDict[fn.__name__] = fn
+
+    def __getitem__(self, name):
+        return self.fnDict[name]
+
+
+def test_add_input_to_output(tag):
+    fns = FnDict()
+
+    @fns
+    def before(input0, input1, input2, input3, input4):
+        return ApplyMomentum(input0, input1, input2, input3, input4)
+
+    return fns[tag]
diff --git a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
index 4c94cdde57..ac38e5427e 100644
--- a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
+++ b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
@@ -15,12 +15,12 @@
  */
 
 #include "common/common_test.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "mindspore/ccsrc/device/kernel_info.h"
-#include "mindspore/ccsrc/device/ascend/ascend_device_address.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "mindspore/ccsrc/runtime/device/kernel_info.h"
+#include "mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -255,7 +255,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputFormat) {
   AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32, kNumberTypeFloat32}, {shape, shape}, add.get());
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -274,7 +274,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputFormat) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -293,7 +293,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputFormat) {
   auto pre_add = kernel_graph->NewCNode(pre_node_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -373,7 +373,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputDeviceShape) {
   MS_EXCEPTION_IF_NULL(add);
   add->set_abstract(tuple_abstract);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsFormat({kOpFormat_NCHW, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_FRAC_NZ});
@@ -404,7 +404,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputDeviceShape) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW, kOpFormat_NHWC});
@@ -457,7 +457,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputDeviceDataTypeTest) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -474,7 +474,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputDeviceDataTypeTest) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -492,7 +492,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputDeviceDataType) {
   auto pre_add = kernel_graph->NewCNode(pre_add_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -513,7 +513,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputAddr) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -528,7 +528,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputAddr) {
   auto pre_add = kernel_graph->NewCNode(pre_add_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -561,7 +561,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetWorkspaceAddr) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -643,7 +643,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetKernelType) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetKernelType(AKG_KERNEL);
@@ -659,7 +659,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetProcessor) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetProcessor(kernel::AICORE);
@@ -675,7 +675,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetFusionType) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetFusionType(kernel::CONVLUTION);
@@ -703,7 +703,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetKernelMod) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   d_kernel_info->set_kernel_mod(nullptr);
   EXPECT_EQ(AnfAlgo::GetKernelMod(add), nullptr);
@@ -764,10 +764,9 @@ TEST_F(AnfRuntimeAlgorithmTest, IsRealCNodeKernel) {
 
 TEST_F(AnfRuntimeAlgorithmTest, IsParameterWeight) {
   auto kernel_graph = std::make_shared<KernelGraph>();
-  py::object obj;
   auto parameter_node = kernel_graph->add_parameter();
   MS_EXCEPTION_IF_NULL(parameter_node);
-  auto param_value_new = std::make_shared<ParamValuePy>(obj);
+  auto param_value_new = std::make_shared<ParamValue>();
   parameter_node->set_default_param(param_value_new);
   EXPECT_TRUE(AnfAlgo::IsParameterWeight(parameter_node));
   EXPECT_THROW(AnfAlgo::IsParameterWeight(nullptr), std::runtime_error);
@@ -780,7 +779,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetStreamId) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   d_kernel_info->set_stream_id(0);
   EXPECT_EQ(AnfAlgo::GetStreamId(add), 0);
diff --git a/tests/ut/cpp/session/kernel_graph_test.cc b/tests/ut/cpp/session/kernel_graph_test.cc
index 75e653c26c..f24036b4aa 100644
--- a/tests/ut/cpp/session/kernel_graph_test.cc
+++ b/tests/ut/cpp/session/kernel_graph_test.cc
@@ -15,11 +15,11 @@
  */
 
 #include "common/common_test.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "mindspore/ccsrc/device/kernel_info.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "mindspore/ccsrc/runtime/device/kernel_info.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -42,7 +42,7 @@ TEST_F(KernelGraphTest, NewValueNode) {
   auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shape);
   add_value->set_abstract(x_abstract);
   add_value->set_kernel_info(std::make_shared<KernelInfo>());
-  auto mutable_kernel_info = add_value->kernel_info();
+  auto mutable_kernel_info = dynamic_cast<device::KernelInfo *>(add_value->kernel_info());
   MS_EXCEPTION_IF_NULL(mutable_kernel_info);
   std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
   builder->SetOutputsFormat({kOpFormat_FRAC_Z});
@@ -82,8 +82,7 @@ TEST_F(KernelGraphTest, NewParameter) {
   // test weight parameter node as input
   auto weight_parameter_node = anf_graph->add_parameter();
   MS_EXCEPTION_IF_NULL(weight_parameter_node);
-  py::object obj;
-  auto param_value_new = std::make_shared<ParamValuePy>(obj);
+  auto param_value_new = std::make_shared<ParamValue>();
   weight_parameter_node->set_default_param(param_value_new);
   weight_parameter_node->set_abstract(x_abstract);
   auto new_weight_parameter_node = kernel_graph->NewParameter(weight_parameter_node);
diff --git a/tests/ut/cpp/session/session_basic_test.cc b/tests/ut/cpp/session/session_basic_test.cc
index 1a7ca68065..c438c92b52 100644
--- a/tests/ut/cpp/session/session_basic_test.cc
+++ b/tests/ut/cpp/session/session_basic_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include "common/common_test.h"
-#include "operator/ops.h"
-#include "session/ascend_session.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/aicpu/aicpu_stub.cc b/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
index 78ada6de18..5516d1fdc8 100644
--- a/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
+++ b/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
index a3a991247c..234ffdaf6b 100644
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -15,7 +15,7 @@
  */
 #include <vector>
 #include "framework/ge_runtime/model_runner.h"
-#include "device/ascend/tasksink/runtime_utils.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
 
 namespace ge {
 namespace model_runner {
@@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
 
 bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }
 
+bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }
+
 bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
   return true;
 }
@@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
   static std::vector<uint32_t> stream_id_list;
   return stream_id_list;
 }
+
+const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
+  static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
+  return runtime_info_map;
+}
 }  // namespace model_runner
 }  // namespace ge
 
diff --git a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
index ba642dfe18..87ab543c7c 100755
--- a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
+++ b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/kernel_fusion.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
index 43d0dd4b3f..f6f2f45092 100644
--- a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
+++ b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
@@ -15,7 +15,7 @@
  */
 #include <fstream>
 #include <memory>
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
index a6ec3a50b5..85470e2315 100755
--- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
@@ -13,10 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_label_assign.h"
-#include "device/ascend/tasksink/task_generator.h"
-#include "device/kernel_adjust.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "runtime/device/kernel_adjust.h"
 
 namespace mindspore {
 namespace device {
@@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
 void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }
 
 void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }
-
-namespace tasksink {
-bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
-                             uint32_t graph_id) {
-  return true;
-}
-}  // namespace tasksink
 }  // namespace ascend
 void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
 bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
diff --git a/tests/ut/cpp/stub/tasksink/task_sink_stub.cc b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
new file mode 100644
index 0000000000..0b12a3862c
--- /dev/null
+++ b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/ascend/tasksink/task_generator.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace tasksink {
+bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
+                             uint32_t graph_id) {
+  return true;
+}
+}  // namespace tasksink
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
\ No newline at end of file
diff --git a/tests/ut/cpp/transform/convert_test.cc b/tests/ut/cpp/transform/convert_test.cc
index f8f48920e0..6902f7d90d 100644
--- a/tests/ut/cpp/transform/convert_test.cc
+++ b/tests/ut/cpp/transform/convert_test.cc
@@ -20,16 +20,16 @@
 
 #include "transform/transform_base_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 #include "common/common_test.h"
 
 #define private public
-#include "transform/types.h"
-#include "transform/convert.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/convert.h"
 #include "securec/include/securec.h"
 #include "utils/utils.h"
 using std::cout;
diff --git a/tests/ut/cpp/transform/graph_builder_test.cc b/tests/ut/cpp/transform/graph_builder_test.cc
index e92463e2dc..e4d72b33cb 100644
--- a/tests/ut/cpp/transform/graph_builder_test.cc
+++ b/tests/ut/cpp/transform/graph_builder_test.cc
@@ -25,8 +25,8 @@
 #endif
 
 #define private public
-#include "transform/graph_builder.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/graph_builder.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 using UT::Common;
 
diff --git a/tests/ut/cpp/transform/graph_manager_test.cc b/tests/ut/cpp/transform/graph_manager_test.cc
index 699f81ca4c..9e55e1725b 100644
--- a/tests/ut/cpp/transform/graph_manager_test.cc
+++ b/tests/ut/cpp/transform/graph_manager_test.cc
@@ -25,7 +25,7 @@
 #endif
 
 #define private public
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 using UT::Common;
 
diff --git a/tests/ut/cpp/transform/graph_runner_test.cc b/tests/ut/cpp/transform/graph_runner_test.cc
index 1b87cea464..b91ec959d2 100644
--- a/tests/ut/cpp/transform/graph_runner_test.cc
+++ b/tests/ut/cpp/transform/graph_runner_test.cc
@@ -21,10 +21,10 @@
 #include "ir/tensor_py.h"
 #include "transform/transform_base_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "operator/ops.h"
-#include "transform/df_graph_manager.h"
-#include "transform/convert.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "frontend/operator/ops.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/convert.h"
 #include "utils/utils.h"
 
 #ifdef OPEN_SOURCE
@@ -34,7 +34,7 @@
 #endif
 
 #define private public
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/graph_runner.h"
 
 using mindspore::tensor::TensorPy;
 
diff --git a/tests/ut/cpp/transform/op_adapter_test.cc b/tests/ut/cpp/transform/op_adapter_test.cc
index 254452bb42..2aa6ba37e3 100644
--- a/tests/ut/cpp/transform/op_adapter_test.cc
+++ b/tests/ut/cpp/transform/op_adapter_test.cc
@@ -19,9 +19,9 @@
 
 #include "common/common_test.h"
 
-#include "transform/op_declare.h"
+#include "transform/graph_ir/op_declare.h"
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 using std::cout;
diff --git a/tests/ut/cpp/transform/transform_base_test.h b/tests/ut/cpp/transform/transform_base_test.h
index 92147dfbbf..4886b25748 100644
--- a/tests/ut/cpp/transform/transform_base_test.h
+++ b/tests/ut/cpp/transform/transform_base_test.h
@@ -20,11 +20,11 @@
 #include <string>
 #include <memory>
 #include <vector>
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
 
 #include "common/common_test.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "./common.h"
 
 #include "graph/tensor.h"
diff --git a/tests/ut/cpp/utils/any_test.cc b/tests/ut/cpp/utils/any_test.cc
index d11831d602..8a49017d95 100644
--- a/tests/ut/cpp/utils/any_test.cc
+++ b/tests/ut/cpp/utils/any_test.cc
@@ -20,7 +20,7 @@
 #include <unordered_map>
 
 #include "common/common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/any.h"
 #include "utils/misc.h"
 
diff --git a/tests/ut/cpp/utils/callback_test.cc b/tests/ut/cpp/utils/callback_test.cc
index c63f68f000..0a4ffb8190 100644
--- a/tests/ut/cpp/utils/callback_test.cc
+++ b/tests/ut/cpp/utils/callback_test.cc
@@ -18,9 +18,9 @@
 #include "pybind11/pybind11.h"
 #include "utils/callbacks.h"
 #include "common/common_test.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/parse/python_adapter.h"
-#include "transform/df_graph_manager.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #include "debug/draw.h"
 #ifdef ENABLE_GE
 #include "utils/callbacks_ge.h"
diff --git a/tests/ut/cpp/utils/graph_utils_test.cc b/tests/ut/cpp/utils/graph_utils_test.cc
index ce5a4318d3..35fa9cdc6a 100644
--- a/tests/ut/cpp/utils/graph_utils_test.cc
+++ b/tests/ut/cpp/utils/graph_utils_test.cc
@@ -24,8 +24,8 @@
 #include "ir/anf.h"
 #include "utils/graph_utils.h"
 
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 
diff --git a/tests/ut/cpp/utils/ir_import_test.cc b/tests/ut/cpp/utils/ir_import_test.cc
index 5e7db98a38..374c36b4e8 100644
--- a/tests/ut/cpp/utils/ir_import_test.cc
+++ b/tests/ut/cpp/utils/ir_import_test.cc
@@ -19,10 +19,10 @@
 
 #include "utils/log_adapter.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 class TestIrImporter : public UT::Common {
diff --git a/tests/ut/cpp/utils/symbolic_test.cc b/tests/ut/cpp/utils/symbolic_test.cc
index f259b62d6b..c0abd388d5 100644
--- a/tests/ut/cpp/utils/symbolic_test.cc
+++ b/tests/ut/cpp/utils/symbolic_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/symbolic.h"
 
 using std::cout;
diff --git a/tests/ut/cpp/utils/validator_test.cc b/tests/ut/cpp/utils/validator_test.cc
index 8eef44bde5..93334d7664 100644
--- a/tests/ut/cpp/utils/validator_test.cc
+++ b/tests/ut/cpp/utils/validator_test.cc
@@ -18,11 +18,11 @@
 #include "common/common_test.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/validator.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/validator.h"
+#include "pipeline/jit/parse/parse.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace validator {
diff --git a/tests/ut/cpp/vm/segment_runner_test.cc b/tests/ut/cpp/vm/segment_runner_test.cc
index b9bc552d90..c83b1b3434 100644
--- a/tests/ut/cpp/vm/segment_runner_test.cc
+++ b/tests/ut/cpp/vm/segment_runner_test.cc
@@ -20,11 +20,11 @@
 #include "ir/manager.h"
 #include "utils/log_adapter.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "utils/graph_utils.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/segment_runner.h"
 #include "vm/transform.h"
 #include "ir/tensor.h"
diff --git a/tests/ut/cpp/vm/vm_test.cc b/tests/ut/cpp/vm/vm_test.cc
index 04633043af..9168d408c3 100644
--- a/tests/ut/cpp/vm/vm_test.cc
+++ b/tests/ut/cpp/vm/vm_test.cc
@@ -15,7 +15,7 @@
  */
 #include "vm/vm.h"
 #include "common/common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/backend.h"
 
 namespace mindspore {
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz
index e4e92210d7..14ddc166e2 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz
index 8cc7e15e31..07ae4e5892 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz
index dafea520fe..a72643457b 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz
index 71e58406ac..9a6ae1cb99 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/cache_map_01_result.npz b/tests/ut/data/dataset/golden/cache_map_01_result.npz
new file mode 100644
index 0000000000..7cff9ded88
Binary files /dev/null and b/tests/ut/data/dataset/golden/cache_map_01_result.npz differ
diff --git a/tests/ut/data/dataset/golden/cache_map_02_result.npz b/tests/ut/data/dataset/golden/cache_map_02_result.npz
new file mode 100644
index 0000000000..7cff9ded88
Binary files /dev/null and b/tests/ut/data/dataset/golden/cache_map_02_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz
index 0c220fd09d..bb33f1bece 100644
Binary files a/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz
index d360bb98ec..416223ff4d 100644
Binary files a/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz
new file mode 100644
index 0000000000..db62d6509e
Binary files /dev/null and b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz
new file mode 100644
index 0000000000..75f4447ded
Binary files /dev/null and b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz
index a909cbe88c..aa9778bd39 100644
Binary files a/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz
index aba6fe97b0..e0e0eb2823 100644
Binary files a/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/repeat_list_result.npz b/tests/ut/data/dataset/golden/repeat_list_result.npz
index c0240c6e21..883ac58be8 100644
Binary files a/tests/ut/data/dataset/golden/repeat_list_result.npz and b/tests/ut/data/dataset/golden/repeat_list_result.npz differ
diff --git a/tests/ut/data/dataset/golden/repeat_result.npz b/tests/ut/data/dataset/golden/repeat_result.npz
index 73b0a24b20..2df787cef8 100644
Binary files a/tests/ut/data/dataset/golden/repeat_result.npz and b/tests/ut/data/dataset/golden/repeat_result.npz differ
diff --git a/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz
new file mode 100644
index 0000000000..999c15e5f3
Binary files /dev/null and b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz differ
diff --git a/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz
new file mode 100644
index 0000000000..ca64884937
Binary files /dev/null and b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz differ
diff --git a/tests/ut/data/dataset/golden/tf_file_no_schema.npz b/tests/ut/data/dataset/golden/tf_file_no_schema.npz
deleted file mode 100644
index b823998521..0000000000
Binary files a/tests/ut/data/dataset/golden/tf_file_no_schema.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tf_file_padBytes10.npz b/tests/ut/data/dataset/golden/tf_file_padBytes10.npz
deleted file mode 100644
index e3d6d9934b..0000000000
Binary files a/tests/ut/data/dataset/golden/tf_file_padBytes10.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tfreader_result.npz b/tests/ut/data/dataset/golden/tfreader_result.npz
deleted file mode 100644
index 10cad9f2b0..0000000000
Binary files a/tests/ut/data/dataset/golden/tfreader_result.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_files_basic.npz b/tests/ut/data/dataset/golden/tfrecord_files_basic.npz
new file mode 100644
index 0000000000..810182faf9
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_files_basic.npz differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_no_schema.npz b/tests/ut/data/dataset/golden/tfrecord_no_schema.npz
new file mode 100644
index 0000000000..bda2807e89
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_no_schema.npz differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz b/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz
new file mode 100644
index 0000000000..580e19de64
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg
new file mode 100644
index 0000000000..242559f276
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg
new file mode 100644
index 0000000000..362d841170
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg
new file mode 100644
index 0000000000..3210a7b1fe
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg
new file mode 100644
index 0000000000..235516d75f
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg
new file mode 100644
index 0000000000..d7666adb9b
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg
new file mode 100644
index 0000000000..c5fe8ff540
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg
new file mode 100644
index 0000000000..f6dfd85547
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/testCifar100Data/datasetSchema.json b/tests/ut/data/dataset/testCifar100Data/datasetSchema.json
deleted file mode 100644
index 474a806bf2..0000000000
--- a/tests/ut/data/dataset/testCifar100Data/datasetSchema.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "datasetType": "CIFAR100",
-  "numRows": 100,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "coarse_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    },
-    "fine_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json b/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json
deleted file mode 100644
index a90edb342b..0000000000
--- a/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "datasetType": "CIFAR100",
-  "numRows": 33,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "coarse_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    },
-    "fine_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin b/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin
index 7964f0952c..b3ec462f79 100644
Binary files a/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin and b/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin differ
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json
deleted file mode 100644
index 9234a6e033..0000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "ALL",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 2
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json
deleted file mode 100644
index 3f61c582a5..0000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "RANDOM",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 1
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json
deleted file mode 100644
index 99e685132b..0000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "UNIQUE",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 3
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetSchema.json b/tests/ut/data/dataset/testCifar10Data/datasetSchema.json
deleted file mode 100644
index 1a04b9af59..0000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetSchema.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "datasetType": "CIFAR10",
-  "numRows": 60000,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json b/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json
deleted file mode 100644
index c25e11c30f..0000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "datasetType": "CIFAR10",
-  "numRows": 33,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json b/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json
deleted file mode 100644
index 0aa5a4577a..0000000000
--- a/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "datasetType": "TF",
-  "numRows": 3,
-  "columns": {
-    "label": {
-      "type": "int64",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data b/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data
deleted file mode 100644
index 829e8d70cb..0000000000
Binary files a/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data and /dev/null differ
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json b/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json
deleted file mode 100644
index b7b3cb9ea3..0000000000
--- a/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "datasetType": "TF",
-  "numRows": 3,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-      "t_impl": "cvmat"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data b/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data
deleted file mode 100644
index 829e8d70cb..0000000000
Binary files a/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data and /dev/null differ
diff --git a/tests/ut/data/mindrecord/testGraphData/testdata b/tests/ut/data/mindrecord/testGraphData/testdata
index e206469ac6..5235973469 100644
Binary files a/tests/ut/data/mindrecord/testGraphData/testdata and b/tests/ut/data/mindrecord/testGraphData/testdata differ
diff --git a/tests/ut/data/mindrecord/testGraphData/testdata.db b/tests/ut/data/mindrecord/testGraphData/testdata.db
index 541da0e998..0f022589f4 100644
Binary files a/tests/ut/data/mindrecord/testGraphData/testdata.db and b/tests/ut/data/mindrecord/testGraphData/testdata.db differ
diff --git a/tests/ut/python/automl/case.py b/tests/ut/python/automl/case.py
new file mode 100644
index 0000000000..745376277c
--- /dev/null
+++ b/tests/ut/python/automl/case.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Test case."""
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+from mindspore import Tensor, context
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 3, 3)
+        self.conv2 = nn.Conv2d(1, 3, 5, has_bias=True)
+        self.layers = (self.conv1, self.conv2)
+
+    def construct(self, x, index):
+        x = self.layers[index](x)
+        y = self.conv1(x)
+        return x + y
+
+
+def test_case():
+    context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
+    net = Net()
+    data = Tensor(np.ones((1, 1, 224, 224)), mindspore.float32)
+    idx = Tensor(1, mindspore.int32)
+    net(data, idx)
diff --git a/tests/ut/python/dataset/test_basic_tokenizer.py b/tests/ut/python/dataset/test_basic_tokenizer.py
deleted file mode 100644
index 45c9f94da4..0000000000
--- a/tests/ut/python/dataset/test_basic_tokenizer.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing BasicTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-BASIC_TOKENIZER_FILE = "../data/dataset/testTokenizerData/basic_tokenizer.txt"
-
-test_paras = [
-    dict(
-        first=1,
-        last=6,
-        expected_tokens=
-        [['Welcome', 'to', 'Beijing', '北', '京', '欢', '迎', '您'],
-         ['長', '風', '破', '浪', '會', '有', '時', '，', '直', '掛', '雲', '帆', '濟', '滄', '海'],
-         ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
-         ['明', '朝', '（', '1368', '—', '1644', '年', '）', '和', '清', '朝',
-          '（', '1644', '—', '1911', '年', '）', '，', '是', '中', '国', '封',
-          '建', '王', '朝', '史', '上', '最', '后', '两', '个', '朝', '代'],
-         ['明', '代', '（', '1368', '-', '1644', '）', 'と', '清', '代',
-          '（', '1644', '-', '1911', '）', 'は', '、', '中', '国', 'の', '封',
-          '建', '王', '朝', 'の', '歴', '史', 'における', '最', '後', 'の2つの', '王', '朝', 'でした'],
-         ['명나라', '(', '1368', '-', '1644', ')', '와', '청나라', '(', '1644', '-', '1911', ')', '는',
-          '중국', '봉건', '왕조의', '역사에서', '마지막', '두', '왕조였다']]
-    ),
-    dict(
-        first=7,
-        last=7,
-        expected_tokens=[['this', 'is', 'a', 'funky', 'string']],
-        lower_case=True
-    ),
-]
-
-
-def check_basic_tokenizer(first, last, expected_tokens, lower_case=False, keep_whitespace=False,
-                          normalization_form=nlp.utils.NormalizeForm.NONE, preserve_unused_token=False):
-    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
-    if first > 1:
-        dataset = dataset.skip(first - 1)
-    if last >= first:
-        dataset = dataset.take(last - first + 1)
-
-    basic_tokenizer = nlp.BasicTokenizer(lower_case=lower_case,
-                                         keep_whitespace=keep_whitespace,
-                                         normalization_form=normalization_form,
-                                         preserve_unused_token=preserve_unused_token)
-
-    dataset = dataset.map(operations=basic_tokenizer)
-    count = 0
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
-        logger.info("Exp:", expected_tokens[count])
-        np.testing.assert_array_equal(text, expected_tokens[count])
-        count = count + 1
-
-
-def test_basic_tokenizer():
-    """
-    Test BasicTokenizer
-    """
-    for paras in test_paras:
-        check_basic_tokenizer(**paras)
-
-
-if __name__ == '__main__':
-    test_basic_tokenizer()
diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py
index fbcb56514f..8924af968c 100644
--- a/tests/ut/python/dataset/test_bounding_box_augment.py
+++ b/tests/ut/python/dataset/test_bounding_box_augment.py
@@ -15,36 +15,21 @@
 """
 Testing the bounding box augment op in DE
 """
-from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \
-    config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5
+
 import numpy as np
 import mindspore.log as logger
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as c_vision
 
+from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \
+    config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5
+
 GENERATE_GOLDEN = False
 
+# updated VOC dataset with correct annotations
 DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
 def test_bounding_box_augment_with_rotation_op(plot_vis=False):
@@ -63,13 +48,6 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
     # Ratio is set to 1 to apply rotation on all bounding boxes.
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -100,22 +78,15 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
     """
     logger.info("test_bounding_box_augment_with_crop_op")
 
-    original_seed = config_get_set_seed(1)
+    original_seed = config_get_set_seed(0)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
 
-    # Ratio is set to 1 to apply rotation on all bounding boxes.
-    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(90), 1)
-
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
+    # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes.
+    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
+
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -154,13 +125,6 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
 
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -183,6 +147,36 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_bounding_box_augment_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without BoundingBoxAugment Op applied,
+    Testing with COCO dataset
+    """
+    logger.info("test_bounding_box_augment_op_coco_c")
+
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_bounding_box_augment_valid_edge_c(plot_vis=False):
     """
     Test BoundingBoxAugment op (testing with valid edge case, box covering full image).
@@ -198,25 +192,18 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
 
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     # Add column for "annotation"
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -249,10 +236,6 @@ def test_bounding_box_augment_invalid_ratio_c():
     try:
         # ratio range is from 0 - 1
         test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
-        # maps to fix annotations to minddata standard
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -260,7 +243,7 @@ def test_bounding_box_augment_invalid_ratio_c():
                                 operations=[test_op])  # Add column for "annotation"
     except ValueError as error:
         logger.info("Got an exception in DE: {}".format(str(error)))
-        assert "Input is not" in str(error)
+        assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)
 
 
 def test_bounding_box_augment_invalid_bounds_c():
@@ -286,6 +269,7 @@ if __name__ == "__main__":
     # set to false to not show plots
     test_bounding_box_augment_with_rotation_op(plot_vis=False)
     test_bounding_box_augment_with_crop_op(plot_vis=False)
+    test_bounding_box_augment_op_coco_c(plot_vis=False)
     test_bounding_box_augment_valid_ratio_c(plot_vis=False)
     test_bounding_box_augment_valid_edge_c(plot_vis=False)
     test_bounding_box_augment_invalid_ratio_c()
diff --git a/tests/ut/python/dataset/test_bucket_batch_by_length.py b/tests/ut/python/dataset/test_bucket_batch_by_length.py
index febcc6483f..405b874110 100644
--- a/tests/ut/python/dataset/test_bucket_batch_by_length.py
+++ b/tests/ut/python/dataset/test_bucket_batch_by_length.py
@@ -17,6 +17,7 @@ import pytest
 import numpy as np
 import mindspore.dataset as ds
 
+
 # generates 1 column [0], [0, 1], ..., [0, ..., n-1]
 def generate_sequential(n):
     for i in range(n):
@@ -44,6 +45,7 @@ def test_bucket_batch_invalid_input():
     bucket_boundaries = [1, 2, 3]
     empty_bucket_boundaries = []
     invalid_bucket_boundaries = ["1", "2", "3"]
+    zero_start_bucket_boundaries = [0, 2, 3]
     negative_bucket_boundaries = [1, 2, -3]
     decreasing_bucket_boundaries = [3, 2, 1]
     non_increasing_bucket_boundaries = [1, 2, 2]
@@ -58,7 +60,7 @@ def test_bucket_batch_invalid_input():
 
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(invalid_column_names, bucket_boundaries, bucket_batch_sizes)
-    assert "column_names should be a list of str" in str(info.value)
+    assert "Argument column_names[0] with value 1 is not of type (<class 'str'>,)." in str(info.value)
 
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, empty_bucket_boundaries, bucket_batch_sizes)
@@ -68,9 +70,13 @@ def test_bucket_batch_invalid_input():
         _ = dataset.bucket_batch_by_length(column_names, invalid_bucket_boundaries, bucket_batch_sizes)
     assert "bucket_boundaries should be a list of int" in str(info.value)
 
+    with pytest.raises(ValueError) as info:
+        _ = dataset.bucket_batch_by_length(column_names, zero_start_bucket_boundaries, bucket_batch_sizes)
+    assert "bucket_boundaries must only contain positive numbers." in str(info.value)
+
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, negative_bucket_boundaries, bucket_batch_sizes)
-    assert "bucket_boundaries cannot contain any negative numbers" in str(info.value)
+    assert "bucket_boundaries must only contain positive numbers." in str(info.value)
 
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, decreasing_bucket_boundaries, bucket_batch_sizes)
@@ -99,12 +105,12 @@ def test_bucket_batch_invalid_input():
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(column_names, bucket_boundaries, bucket_batch_sizes,
                                            None, None, invalid_type_pad_to_bucket_boundary)
-    assert "Wrong input type for pad_to_bucket_boundary, should be <class 'bool'>" in str(info.value)
+    assert "Argument pad_to_bucket_boundary with value \"\" is not of type (<class \'bool\'>,)." in str(info.value)
 
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(column_names, bucket_boundaries, bucket_batch_sizes,
                                            None, None, False, invalid_type_drop_remainder)
-    assert "Wrong input type for drop_remainder, should be <class 'bool'>" in str(info.value)
+    assert "Argument drop_remainder with value \"\" is not of type (<class 'bool'>,)." in str(info.value)
 
 
 def test_bucket_batch_multi_bucket_no_padding():
@@ -272,7 +278,6 @@ def test_bucket_batch_default_pad():
                         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0],
                         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]]
 
-
     output = []
     for data in dataset.create_dict_iterator():
         output.append(data["col1"].tolist())
diff --git a/tests/ut/python/dataset/test_cache_map.py b/tests/ut/python/dataset/test_cache_map.py
new file mode 100644
index 0000000000..0e42b422aa
--- /dev/null
+++ b/tests/ut/python/dataset/test_cache_map.py
@@ -0,0 +1,157 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing cache operator with mappable datasets
+"""
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
+from mindspore import log as logger
+from util import save_and_check_md5
+
+DATA_DIR = "../data/dataset/testImageNetData/train/"
+
+GENERATE_GOLDEN = False
+
+def test_cache_map_basic1():
+    """
+    Test mappable leaf with cache op right over the leaf
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+     ImageFolder
+    """
+
+    logger.info("Test cache map basic 1")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    filename = "cache_map_01_result.npz"
+    save_and_check_md5(ds1, filename, generate_golden=GENERATE_GOLDEN)
+
+    logger.info("test_cache_map_basic1 Ended.\n")
+
+
+def test_cache_map_basic2():
+    """
+    Test mappable leaf with the cache op later in the tree above the map(decode)
+
+       Repeat
+         |
+       Cache
+         |
+     Map(decode)
+         |
+     ImageFolder
+    """
+
+    logger.info("Test cache map basic 2")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    filename = "cache_map_02_result.npz"
+    save_and_check_md5(ds1, filename, generate_golden=GENERATE_GOLDEN)
+
+    logger.info("test_cache_map_basic2 Ended.\n")
+
+
+def test_cache_map_basic3():
+    """
+    Test a repeat under mappable cache
+
+        Cache
+          |
+      Map(decode)
+          |
+        Repeat
+          |
+      ImageFolder
+    """
+
+    logger.info("Test cache basic 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.repeat(4)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 8
+    logger.info('test_cache_basic3 Ended.\n')
+
+
+def test_cache_map_failure1():
+    """
+    Test nested cache (failure)
+
+        Repeat
+          |
+        Cache
+          |
+      Map(decode)
+          |
+        Cache
+          |
+      ImageFolder
+
+    """
+    logger.info("Test cache failure 1")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    try:
+        num_iter = 0
+        for _ in ds1.create_dict_iterator():
+            num_iter += 1
+    except RuntimeError as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "Nested cache operations is not supported!" in str(e)
+
+    assert num_iter == 0
+    logger.info('test_cache_failure1 Ended.\n')
+
+if __name__ == '__main__':
+    test_cache_map_basic1()
+    test_cache_map_basic2()
+    test_cache_map_basic3()
+    test_cache_map_failure1()
diff --git a/tests/ut/python/dataset/test_cache_nomap.py b/tests/ut/python/dataset/test_cache_nomap.py
new file mode 100644
index 0000000000..39e00c0621
--- /dev/null
+++ b/tests/ut/python/dataset/test_cache_nomap.py
@@ -0,0 +1,429 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing cache operator with non-mappable datasets
+"""
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
+from mindspore import log as logger
+
+DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
+SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
+
+GENERATE_GOLDEN = False
+
+def test_cache_nomap_basic1():
+    """
+    A random dataset (a non mappable dataset) with a cache over it just after the leaf
+    """
+
+    logger.info("Test cache nomap basic 1")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8,
+                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # create a cache.  arbitrary session_id for now
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # User-created sampler here
+    ds1 = ds.RandomDataset(schema=schema, total_rows=10, num_parallel_workers=4, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():
+        logger.info("printing the label: {}".format(data["label"]))
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 40
+    logger.info("test_cache_nomap_basic1 Ended.\n")
+
+
+def test_cache_nomap_basic2():
+    """
+    A random dataset (a non mappable dataset) with a cache over it just after the leaf
+    """
+
+    logger.info("Test cache nomap basic 2")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8,
+                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # create a cache.  arbitrary session_id for now
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # sampler arg not given directly, however any of these args will auto-generate an appropriate sampler:
+    # num_samples, shuffle, num_shards, shard_id
+    # In this case, the presence of num_samples chooses a sampler.
+    ds1 = ds.RandomDataset(schema=schema, total_rows=20, num_samples=20, num_parallel_workers=4, cache=some_cache)
+    ds1 = ds1.repeat(2)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():
+        logger.info("printing the label: {}".format(data["label"]))
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 40
+    logger.info("test_cache_nomap_basic2 Ended.\n")
+
+
+def test_cache_nomap_basic3():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic3 Ended.\n")
+
+
+def test_cache_nomap_basic4():
+    """
+    A TF reader dataset (a non mappable dataset) with a map decode and cache after it
+    Since a global shuffle is used for the tf reader, it will inject a shuffle op over the tf.
+    But, if there's a cache later, that shuffle becomes invalid and should be removed.
+
+       Repeat
+         |
+       Cache
+         |
+     Map(decode)
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 4")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    # With shuffle not being set, TF defaults to a "global" shuffle when there is no cache
+    # in the picture.  This causes a shuffle-injection over the TF.  For clarify, this test will
+    # explicitly give the global option, even though it's the default in python.
+    # But, when caching is added in the ascendent tree above TF, we do global shuffling
+    # through the sampler over the cache, not by the shuffle op.  In that case, tree prepare
+    # will remove the shuffle op that got injected by the initial tree creation.
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
+    decode_op = c_vision.Decode()
+
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic4 Ended.\n")
+
+
+def test_cache_nomap_basic5():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+    Same as test 3, but this one does not have shuffle arg, causing tf to default to global
+    shuffle which attempts to inject a shuffle operator.  However, since there is a cache
+    we do not need global shuffle, so the shuffle will not be built.  It ends up being
+    identical to test basic 3, however we arrive at the same tree in different codepaths
+    (if there was no cache, then the shuffle IS built)
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 5")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic5 Ended.\n")
+
+
+def test_cache_nomap_basic6():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+    In this one, the tf dataset will be given sharding configuration, however since a cache is
+    used, the tree prepare should undo the sharding configuration and instead, a distributed
+    sampler will be chosen with the same shard config.
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 6")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # With only 3 records shard into 3, we expect only 1 record returned for this shard
+    # However, the sharding will be done by the sampler, not by the tf record leaf node
+    # In this case, it is a row-based sharding, not the file-based sharding that would happen if
+    # there was not any cache.
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 4
+    logger.info("test_cache_nomap_basic6 Ended.\n")
+
+
+def test_cache_nomap_basic7():
+    """
+    A TF reader dataset (a non mappable dataset) that uses global shuffle, and is cached followed by
+    map.
+    In this one, the tf dataset with global shuffle might want to inject a shuffle op over top of the
+    tf reader, but since a cache is given, it will choose not to.
+
+       Repeat
+         |
+     Map(decode)
+         |
+       cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 7")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic7 Ended.\n")
+
+
+def test_cache_nomap_allowed_share1():
+    """
+    It is allowed to share the cache between the following two trees:
+
+       Repeat     Shuffle
+         |           |
+       Cache       Cache
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap allowed share 1")
+
+    ds.config.set_seed(1)
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    ds2 = ds2.shuffle(buffer_size=2)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 12
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 3
+    logger.info("test_cache_nomap_allowed_share1 Ended.\n")
+
+
+def test_cache_nomap_allowed_share2():
+    """
+    It is allowed to share the cache between the following two trees (with map decode):
+
+       Repeat     Shuffle
+         |           |
+       Cache       Cache
+         |           |
+     Map(decode) Map(decode)
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap allowed share 2")
+
+    ds.config.set_seed(1)
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=2, size=0, spilling=True)
+    decode_op = c_vision.Decode()
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds2 = ds2.shuffle(buffer_size=2)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 3
+    logger.info("test_cache_nomap_allowed_share2 Ended.\n")
+
+
+def test_cache_nomap_allowed_share3():
+    """
+    It is allowed to share the cache between the following two trees (different shard ids):
+
+       Repeat                     Repeat
+         |                          |
+       Cache                      Cache
+         |                          |
+      TFReader(shard_id = 0)     TFReader(shard_id = 1)
+    """
+
+    logger.info("Test cache nomap allowed share 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data"]
+    ds1 = ds.TFRecordDataset(tf_files, num_shards=2, shard_id=0, num_samples=3, shuffle=False, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(tf_files, num_shards=2, shard_id=1, num_samples=3, shuffle=False, cache=some_cache)
+    ds2 = ds2.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 12
+    logger.info("test_cache_nomap_allowed_share3 Ended.\n")
+
+
+def test_cache_nomap_disallowed_share1():
+    """
+    It is not allowed to share the cache between the following two trees:
+
+       Cache       Cache
+         |           |
+     Map(decode) Map(rescale)
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap disallowed share1")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    decode_op = c_vision.Decode()
+    rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 3
+
+    try:
+        sum([1 for _ in ds2])
+    except RuntimeError as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "Attempt to re-use a cache for a different tree!" in str(e)
+
+    logger.info("test_cache_nomap_disallowed_share1 Ended.\n")
+
+
+if __name__ == '__main__':
+    test_cache_nomap_basic1()
+    test_cache_nomap_basic2()
+    test_cache_nomap_basic3()
+    test_cache_nomap_basic4()
+    test_cache_nomap_basic5()
+    test_cache_nomap_basic6()
+    test_cache_nomap_basic7()
+    test_cache_nomap_allowed_share1()
+    test_cache_nomap_allowed_share2()
+    test_cache_nomap_allowed_share3()
+    test_cache_nomap_disallowed_share1()
diff --git a/tests/ut/python/dataset/test_cifarop.py b/tests/ut/python/dataset/test_cifarop.py
deleted file mode 100644
index e944f8703d..0000000000
--- a/tests/ut/python/dataset/test_cifarop.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import os
-
-import numpy as np
-
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-# Data for CIFAR and MNIST are not part of build tree
-# They need to be downloaded directly
-# prep_data.py can be executed or code below
-# import sys
-# sys.path.insert(0,"../../data")
-# import prep_data
-# prep_data.download_all_for_test("../../data")
-DATA_DIR_10 = "../data/dataset/testCifar10Data"
-DATA_DIR_100 = "../data/dataset/testCifar100Data"
-
-
-def load_cifar(path):
-    raw = np.empty(0, dtype=np.uint8)
-    for file_name in os.listdir(path):
-        if file_name.endswith(".bin"):
-            with open(os.path.join(path, file_name), mode='rb') as file:
-                raw = np.append(raw, np.fromfile(file, dtype=np.uint8), axis=0)
-    raw = raw.reshape(-1, 3073)
-    labels = raw[:, 0]
-    images = raw[:, 1:]
-    images = images.reshape(-1, 3, 32, 32)
-    images = images.transpose(0, 2, 3, 1)
-    return images, labels
-
-
-def test_case_dataset_cifar10():
-    """
-    dataset parameter
-    """
-    logger.info("Test dataset parameter")
-    # apply dataset operations
-    data1 = ds.Cifar10Dataset(DATA_DIR_10, 100)
-
-    num_iter = 0
-    for _ in data1.create_dict_iterator():
-        # in this example, each dictionary has keys "image" and "label"
-        num_iter += 1
-    assert num_iter == 100
-
-
-def test_case_dataset_cifar100():
-    """
-    dataset parameter
-    """
-    logger.info("Test dataset parameter")
-    # apply dataset operations
-    data1 = ds.Cifar100Dataset(DATA_DIR_100, 100)
-
-    num_iter = 0
-    for _ in data1.create_dict_iterator():
-        # in this example, each dictionary has keys "image" and "label"
-        num_iter += 1
-    assert num_iter == 100
-
-
-def test_reading_cifar10():
-    """
-    Validate CIFAR10 image readings
-    """
-    data1 = ds.Cifar10Dataset(DATA_DIR_10, 100, shuffle=False)
-    images, labels = load_cifar(DATA_DIR_10)
-    for i, d in enumerate(data1.create_dict_iterator()):
-        np.testing.assert_array_equal(d["image"], images[i])
-        np.testing.assert_array_equal(d["label"], labels[i])
-
-
-if __name__ == '__main__':
-    test_case_dataset_cifar10()
-    test_case_dataset_cifar100()
-    test_reading_cifar10()
diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py
index d04ff49724..f7a432e471 100644
--- a/tests/ut/python/dataset/test_concatenate_op.py
+++ b/tests/ut/python/dataset/test_concatenate_op.py
@@ -108,7 +108,7 @@ def test_concatenate_op_type_mismatch():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Tensor types do not match" in repr(error_info.value)
+    assert "Tensor types do not match" in str(error_info.value)
 
 
 def test_concatenate_op_type_mismatch2():
@@ -123,7 +123,7 @@ def test_concatenate_op_type_mismatch2():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Tensor types do not match" in repr(error_info.value)
+    assert "Tensor types do not match" in str(error_info.value)
 
 
 def test_concatenate_op_incorrect_dim():
@@ -138,13 +138,13 @@ def test_concatenate_op_incorrect_dim():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Only 1D tensors supported" in repr(error_info.value)
+    assert "Only 1D tensors supported" in str(error_info.value)
 
 
 def test_concatenate_op_wrong_axis():
     with pytest.raises(ValueError) as error_info:
         data_trans.Concatenate(2)
-    assert "only 1D concatenation supported." in repr(error_info.value)
+    assert "only 1D concatenation supported." in str(error_info.value)
 
 
 def test_concatenate_op_negative_axis():
@@ -163,18 +163,11 @@ def test_concatenate_op_negative_axis():
 
 
 def test_concatenate_op_incorrect_input_dim():
-    def gen():
-        yield (np.array(["ss", "ad"], dtype='S'),)
-
     prepend_tensor = np.array([["ss", "ad"], ["ss", "ad"]], dtype='S')
-    data = ds.GeneratorDataset(gen, column_names=["col"])
-    concatenate_op = data_trans.Concatenate(0, prepend_tensor)
 
-    data = data.map(input_columns=["col"], operations=concatenate_op)
-    with pytest.raises(RuntimeError) as error_info:
-        for _ in data:
-            pass
-    assert "Only 1D tensors supported" in repr(error_info.value)
+    with pytest.raises(ValueError) as error_info:
+        data_trans.Concatenate(0, prepend_tensor)
+    assert "can only prepend 1D arrays." in str(error_info.value)
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py
index 259f42d948..6783eea2fd 100644
--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@@ -245,17 +245,17 @@ def test_deterministic_run_distribution():
 
     # First dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
-    random_crop_op = c_vision.RandomHorizontalFlip(0.1)
+    random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
     decode_op = c_vision.Decode()
     data1 = data1.map(input_columns=["image"], operations=decode_op)
-    data1 = data1.map(input_columns=["image"], operations=random_crop_op)
+    data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op)
 
     # Second dataset
     data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
     data2 = data2.map(input_columns=["image"], operations=decode_op)
     # If seed is set up on constructor, so the two ops output deterministic sequence
-    random_crop_op2 = c_vision.RandomHorizontalFlip(0.1)
-    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
+    random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
+    data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2)
 
     for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
         np.testing.assert_equal(item1["image"], item2["image"])
diff --git a/tests/ut/python/dataset/test_dataset_numpy_slices.py b/tests/ut/python/dataset/test_dataset_numpy_slices.py
index 4cd4e26a33..791a567408 100644
--- a/tests/ut/python/dataset/test_dataset_numpy_slices.py
+++ b/tests/ut/python/dataset/test_dataset_numpy_slices.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import sys
+import pytest
 import numpy as np
+import pandas as pd
 import mindspore.dataset as de
 from mindspore import log as logger
 import mindspore.dataset.transforms.vision.c_transforms as vision
-import pandas as pd
 
 
 def test_numpy_slices_list_1():
@@ -172,8 +174,26 @@ def test_numpy_slices_distributed_sampler():
     assert sum([1 for _ in ds]) == 2
 
 
-def test_numpy_slices_sequential_sampler():
+def test_numpy_slices_distributed_shard_limit():
+    logger.info("Test Slicing a 1D list.")
+
+    np_data = [1, 2, 3]
+    num = sys.maxsize
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, num_shards=num, shard_id=0, shuffle=False)
+    assert "Input num_shards is not within the required interval of (1 to 2147483647)." in str(err.value)
+
 
+def test_numpy_slices_distributed_zero_shard():
+    logger.info("Test Slicing a 1D list.")
+
+    np_data = [1, 2, 3]
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, num_shards=0, shard_id=0, shuffle=False)
+    assert "Input num_shards is not within the required interval of (1 to 2147483647)." in str(err.value)
+
+
+def test_numpy_slices_sequential_sampler():
     logger.info("Test numpy_slices_dataset with SequentialSampler and repeat.")
 
     np_data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
@@ -183,6 +203,42 @@ def test_numpy_slices_sequential_sampler():
         assert np.equal(data[0], np_data[i % 8]).all()
 
 
+def test_numpy_slices_invalid_column_names_type():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(TypeError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[1], shuffle=False)
+    assert "Argument column_names[0] with value 1 is not of type (<class 'str'>,)." in str(err.value)
+
+
+def test_numpy_slices_invalid_column_names_string():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[""], shuffle=False)
+    assert "column_names[0] should not be empty" in str(err.value)
+
+
+def test_numpy_slices_invalid_empty_column_names():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[], shuffle=False)
+    assert "column_names should not be empty" in str(err.value)
+
+
+def test_numpy_slices_invalid_empty_data_column():
+    logger.info("Test incorrect column_names input")
+    np_data = []
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, shuffle=False)
+    assert "Argument data cannot be empty" in str(err.value)
+
+
 if __name__ == "__main__":
     test_numpy_slices_list_1()
     test_numpy_slices_list_2()
@@ -196,4 +252,10 @@ if __name__ == "__main__":
     test_numpy_slices_csv_dict()
     test_numpy_slices_num_samplers()
     test_numpy_slices_distributed_sampler()
+    test_numpy_slices_distributed_shard_limit()
+    test_numpy_slices_distributed_zero_shard()
     test_numpy_slices_sequential_sampler()
+    test_numpy_slices_invalid_column_names_type()
+    test_numpy_slices_invalid_column_names_string()
+    test_numpy_slices_invalid_empty_column_names()
+    test_numpy_slices_invalid_empty_data_column()
diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py
new file mode 100644
index 0000000000..d6d3029b53
--- /dev/null
+++ b/tests/ut/python/dataset/test_datasets_cifarop.py
@@ -0,0 +1,387 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Test Cifar10 and Cifar100 dataset operators
+"""
+import os
+import pytest
+import numpy as np
+import matplotlib.pyplot as plt
+import mindspore.dataset as ds
+from mindspore import log as logger
+
+DATA_DIR_10 = "../data/dataset/testCifar10Data"
+DATA_DIR_100 = "../data/dataset/testCifar100Data"
+
+
+def load_cifar(path, kind="cifar10"):
+    """
+    load Cifar10/100 data
+    """
+    raw = np.empty(0, dtype=np.uint8)
+    for file_name in os.listdir(path):
+        if file_name.endswith(".bin"):
+            with open(os.path.join(path, file_name), mode='rb') as file:
+                raw = np.append(raw, np.fromfile(file, dtype=np.uint8), axis=0)
+    if kind == "cifar10":
+        raw = raw.reshape(-1, 3073)
+        labels = raw[:, 0]
+        images = raw[:, 1:]
+    elif kind == "cifar100":
+        raw = raw.reshape(-1, 3074)
+        labels = raw[:, :2]
+        images = raw[:, 2:]
+    else:
+        raise ValueError("Invalid parameter value")
+    images = images.reshape(-1, 3, 32, 32)
+    images = images.transpose(0, 2, 3, 1)
+    return images, labels
+
+
+def visualize_dataset(images, labels):
+    """
+    Helper function to visualize the dataset samples
+    """
+    num_samples = len(images)
+    for i in range(num_samples):
+        plt.subplot(1, num_samples, i + 1)
+        plt.imshow(images[i])
+        plt.title(labels[i])
+    plt.show()
+
+
+### Testcases for Cifar10Dataset Op ###
+
+
+def test_cifar10_content_check():
+    """
+    Validate Cifar10Dataset image readings
+    """
+    logger.info("Test Cifar10Dataset Op with content check")
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100, shuffle=False)
+    images, labels = load_cifar(DATA_DIR_10)
+    num_iter = 0
+    # in this example, each dictionary has keys "image" and "label"
+    for i, d in enumerate(data1.create_dict_iterator()):
+        np.testing.assert_array_equal(d["image"], images[i])
+        np.testing.assert_array_equal(d["label"], labels[i])
+        num_iter += 1
+    assert num_iter == 100
+
+
+def test_cifar10_basic():
+    """
+    Validate CIFAR10
+    """
+    logger.info("Test Cifar10Dataset Op")
+
+    # case 1: test num_samples
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter1 += 1
+    assert num_iter1 == 100
+
+    # case 2: test num_parallel_workers
+    data2 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=50, num_parallel_workers=1)
+    num_iter2 = 0
+    for _ in data2.create_dict_iterator():
+        num_iter2 += 1
+    assert num_iter2 == 50
+
+    # case 3: test repeat
+    data3 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    data3 = data3.repeat(3)
+    num_iter3 = 0
+    for _ in data3.create_dict_iterator():
+        num_iter3 += 1
+    assert num_iter3 == 300
+
+    # case 4: test batch with drop_remainder=False
+    data4 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    assert data4.get_dataset_size() == 100
+    assert data4.get_batch_size() == 1
+    data4 = data4.batch(batch_size=7)  # drop_remainder is default to be False
+    assert data4.get_dataset_size() == 15
+    assert data4.get_batch_size() == 7
+    num_iter4 = 0
+    for _ in data4.create_dict_iterator():
+        num_iter4 += 1
+    assert num_iter4 == 15
+
+    # case 5: test batch with drop_remainder=True
+    data5 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    assert data5.get_dataset_size() == 100
+    assert data5.get_batch_size() == 1
+    data5 = data5.batch(batch_size=7, drop_remainder=True)  # the rest of incomplete batch will be dropped
+    assert data5.get_dataset_size() == 14
+    assert data5.get_batch_size() == 7
+    num_iter5 = 0
+    for _ in data5.create_dict_iterator():
+        num_iter5 += 1
+    assert num_iter5 == 14
+
+
+def test_cifar10_pk_sampler():
+    """
+    Test Cifar10Dataset with PKSampler
+    """
+    logger.info("Test Cifar10Dataset Op with PKSampler")
+    golden = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4,
+              5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9]
+    sampler = ds.PKSampler(3)
+    data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
+    num_iter = 0
+    label_list = []
+    for item in data.create_dict_iterator():
+        label_list.append(item["label"])
+        num_iter += 1
+    np.testing.assert_array_equal(golden, label_list)
+    assert num_iter == 30
+
+
+def test_cifar10_sequential_sampler():
+    """
+    Test Cifar10Dataset with SequentialSampler
+    """
+    logger.info("Test Cifar10Dataset Op with SequentialSampler")
+    num_samples = 30
+    sampler = ds.SequentialSampler(num_samples=num_samples)
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
+    data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples)
+    num_iter = 0
+    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
+        np.testing.assert_equal(item1["label"], item2["label"])
+        num_iter += 1
+    assert num_iter == num_samples
+
+
+def test_cifar10_exception():
+    """
+    Test error cases for Cifar10Dataset
+    """
+    logger.info("Test error cases for Cifar10Dataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.Cifar10Dataset(DATA_DIR_10, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.Cifar10Dataset(DATA_DIR_10, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=88)
+
+
+def test_cifar10_visualize(plot=False):
+    """
+    Visualize Cifar10Dataset results
+    """
+    logger.info("Test Cifar10Dataset visualization")
+
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    num_iter = 0
+    image_list, label_list = [], []
+    for item in data1.create_dict_iterator():
+        image = item["image"]
+        label = item["label"]
+        image_list.append(image)
+        label_list.append("label {}".format(label))
+        assert isinstance(image, np.ndarray)
+        assert image.shape == (32, 32, 3)
+        assert image.dtype == np.uint8
+        assert label.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+    if plot:
+        visualize_dataset(image_list, label_list)
+
+
+### Testcases for Cifar100Dataset Op ###
+
+def test_cifar100_content_check():
+    """
+    Validate Cifar100Dataset image readings
+    """
+    logger.info("Test Cifar100Dataset with content check")
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100, shuffle=False)
+    images, labels = load_cifar(DATA_DIR_100, kind="cifar100")
+    num_iter = 0
+    # in this example, each dictionary has keys "image", "coarse_label" and "fine_image"
+    for i, d in enumerate(data1.create_dict_iterator()):
+        np.testing.assert_array_equal(d["image"], images[i])
+        np.testing.assert_array_equal(d["coarse_label"], labels[i][0])
+        np.testing.assert_array_equal(d["fine_label"], labels[i][1])
+        num_iter += 1
+    assert num_iter == 100
+
+
+def test_cifar100_basic():
+    """
+    Test Cifar100Dataset
+    """
+    logger.info("Test Cifar100Dataset")
+
+    # case 1: test num_samples
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter1 += 1
+    assert num_iter1 == 100
+
+    # case 2: test repeat
+    data1 = data1.repeat(2)
+    num_iter2 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter2 += 1
+    assert num_iter2 == 200
+
+    # case 3: test num_parallel_workers
+    data2 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100, num_parallel_workers=1)
+    num_iter3 = 0
+    for _ in data2.create_dict_iterator():
+        num_iter3 += 1
+    assert num_iter3 == 100
+
+    # case 4: test batch with drop_remainder=False
+    data3 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    assert data3.get_dataset_size() == 100
+    assert data3.get_batch_size() == 1
+    data3 = data3.batch(batch_size=3)
+    assert data3.get_dataset_size() == 34
+    assert data3.get_batch_size() == 3
+    num_iter4 = 0
+    for _ in data3.create_dict_iterator():
+        num_iter4 += 1
+    assert num_iter4 == 34
+
+    # case 4: test batch with drop_remainder=True
+    data4 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    data4 = data4.batch(batch_size=3, drop_remainder=True)
+    assert data4.get_dataset_size() == 33
+    assert data4.get_batch_size() == 3
+    num_iter5 = 0
+    for _ in data4.create_dict_iterator():
+        num_iter5 += 1
+    assert num_iter5 == 33
+
+
+def test_cifar100_pk_sampler():
+    """
+    Test Cifar100Dataset with PKSampler
+    """
+    logger.info("Test Cifar100Dataset with PKSampler")
+    golden = [i for i in range(20)]
+    sampler = ds.PKSampler(1)
+    data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler)
+    num_iter = 0
+    label_list = []
+    for item in data.create_dict_iterator():
+        label_list.append(item["coarse_label"])
+        num_iter += 1
+    np.testing.assert_array_equal(golden, label_list)
+    assert num_iter == 20
+
+
+def test_cifar100_exception():
+    """
+    Test error cases for Cifar100Dataset
+    """
+    logger.info("Test error cases for Cifar100Dataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.Cifar100Dataset(DATA_DIR_100, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.Cifar100Dataset(DATA_DIR_100, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.Cifar100Dataset(DATA_DIR_100, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar100Dataset(DATA_DIR_100, num_shards=2, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_100, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=88)
+
+
+def test_cifar100_visualize(plot=False):
+    """
+    Visualize Cifar100Dataset results
+    """
+    logger.info("Test Cifar100Dataset visualization")
+
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False)
+    num_iter = 0
+    image_list, label_list = [], []
+    for item in data1.create_dict_iterator():
+        image = item["image"]
+        coarse_label = item["coarse_label"]
+        fine_label = item["fine_label"]
+        image_list.append(image)
+        label_list.append("coarse_label {}\nfine_label {}".format(coarse_label, fine_label))
+        assert isinstance(image, np.ndarray)
+        assert image.shape == (32, 32, 3)
+        assert image.dtype == np.uint8
+        assert coarse_label.dtype == np.uint32
+        assert fine_label.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+    if plot:
+        visualize_dataset(image_list, label_list)
+
+
+if __name__ == '__main__':
+    test_cifar10_content_check()
+    test_cifar10_basic()
+    test_cifar10_pk_sampler()
+    test_cifar10_sequential_sampler()
+    test_cifar10_exception()
+    test_cifar10_visualize(plot=False)
+
+    test_cifar100_content_check()
+    test_cifar100_basic()
+    test_cifar100_pk_sampler()
+    test_cifar100_exception()
+    test_cifar100_visualize(plot=False)
diff --git a/tests/ut/python/dataset/test_datasets_imagenet.py b/tests/ut/python/dataset/test_datasets_imagenet.py
deleted file mode 100644
index a6e2afa65a..0000000000
--- a/tests/ut/python/dataset/test_datasets_imagenet.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import mindspore.dataset as ds
-import mindspore.dataset.transforms.c_transforms as data_trans
-import mindspore.dataset.transforms.vision.c_transforms as vision
-from mindspore import log as logger
-
-DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
-SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
-
-
-def test_case_repeat():
-    """
-    a simple repeat operation.
-    """
-    logger.info("Test Simple Repeat")
-    # define parameters
-    repeat_count = 2
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(repeat_count)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_shuffle():
-    """
-        a simple shuffle operation.
-    """
-    logger.info("Test Simple Shuffle")
-    # define parameters
-    buffer_size = 8
-    seed = 10
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-
-    for item in data1.create_dict_iterator():
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-
-
-def test_case_0():
-    """
-    Test Repeat then Shuffle
-    """
-    logger.info("Test Repeat then Shuffle")
-    # define parameters
-    repeat_count = 2
-    buffer_size = 7
-    seed = 9
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(repeat_count)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_0_reverse():
-    """
-    Test Shuffle then Repeat
-    """
-    logger.info("Test Shuffle then Repeat")
-    # define parameters
-    repeat_count = 2
-    buffer_size = 10
-    seed = 9
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-    data1 = data1.repeat(repeat_count)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_3():
-    """
-    Test Map
-    """
-    logger.info("Test Map Rescale and Resize, then Shuffle")
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    # define data augmentation parameters
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    resize_height, resize_width = 224, 224
-
-    # define map operations
-    decode_op = vision.Decode()
-    rescale_op = vision.Rescale(rescale, shift)
-    # resize_op = vision.Resize(resize_height, resize_width,
-    #                            InterpolationMode.DE_INTER_LINEAR)  # Bilinear mode
-    resize_op = vision.Resize((resize_height, resize_width))
-
-    # apply map operations on images
-    data1 = data1.map(input_columns=["image"], operations=decode_op)
-    data1 = data1.map(input_columns=["image"], operations=rescale_op)
-    data1 = data1.map(input_columns=["image"], operations=resize_op)
-
-    # # apply ont-hot encoding on labels
-    num_classes = 4
-    one_hot_encode = data_trans.OneHot(num_classes)  # num_classes is input argument
-    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
-    #
-    # # apply Datasets
-    buffer_size = 100
-    seed = 10
-    batch_size = 2
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)  # 10000 as in imageNet train script
-    data1 = data1.batch(batch_size, drop_remainder=True)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-if __name__ == '__main__':
-    logger.info('===========now test Repeat============')
-    # logger.info('Simple Repeat')
-    test_case_repeat()
-    logger.info('\n')
-
-    logger.info('===========now test Shuffle===========')
-    # logger.info('Simple Shuffle')
-    test_case_shuffle()
-    logger.info('\n')
-
-    # Note: cannot work with different shapes, hence not for image
-    # logger.info('===========now test Batch=============')
-    # # logger.info('Simple Batch')
-    # test_case_batch()
-    # logger.info('\n')
-
-    logger.info('===========now test case 0============')
-    # logger.info('Repeat then Shuffle')
-    test_case_0()
-    logger.info('\n')
-
-    logger.info('===========now test case 0 reverse============')
-    # # logger.info('Shuffle then  Repeat')
-    test_case_0_reverse()
-    logger.info('\n')
-
-    # logger.info('===========now test case 1============')
-    # # logger.info('Repeat with Batch')
-    # test_case_1()
-    # logger.info('\n')
-
-    # logger.info('===========now test case 2============')
-    # # logger.info('Batch with Shuffle')
-    # test_case_2()
-    # logger.info('\n')
-
-    # for image augmentation only
-    logger.info('===========now test case 3============')
-    logger.info('Map then Shuffle')
-    test_case_3()
-    logger.info('\n')
diff --git a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py b/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
deleted file mode 100644
index 92bdb68dc5..0000000000
--- a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
-
-SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
-
-
-def test_tf_file_normal():
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(1)
-    num_iter = 0
-    for _ in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-    assert num_iter == 12
-
-
-if __name__ == '__main__':
-    logger.info('=======test normal=======')
-    test_tf_file_normal()
diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py
index 8db65e9734..37f4a8c123 100644
--- a/tests/ut/python/dataset/test_datasets_voc.py
+++ b/tests/ut/python/dataset/test_datasets_voc.py
@@ -37,7 +37,7 @@ def test_voc_detection():
     for item in data1.create_dict_iterator():
         assert item["image"].shape[0] == IMAGE_SHAPE[num]
         for bbox in item["annotation"]:
-            count[bbox[0]] += 1
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 9
     assert count == [3, 2, 1, 2, 4, 3]
@@ -55,8 +55,8 @@ def test_voc_class_index():
     count = [0, 0, 0, 0, 0, 0]
     for item in data1.create_dict_iterator():
         for bbox in item["annotation"]:
-            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 5)
-            count[bbox[0]] += 1
+            assert (int(bbox[6]) == 0 or int(bbox[6]) == 1 or int(bbox[6]) == 5)
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 6
     assert count == [3, 2, 0, 0, 0, 3]
@@ -73,8 +73,9 @@ def test_voc_get_class_indexing():
     count = [0, 0, 0, 0, 0, 0]
     for item in data1.create_dict_iterator():
         for bbox in item["annotation"]:
-            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 2 or bbox[0] == 3 or bbox[0] == 4 or bbox[0] == 5)
-            count[bbox[0]] += 1
+            assert (int(bbox[6]) == 0 or int(bbox[6]) == 1 or int(bbox[6]) == 2 or int(bbox[6]) == 3
+                    or int(bbox[6]) == 4 or int(bbox[6]) == 5)
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 9
     assert count == [3, 2, 1, 2, 4, 3]
diff --git a/tests/ut/python/dataset/test_exceptions.py b/tests/ut/python/dataset/test_exceptions.py
index cbfa402bb0..253eb564ae 100644
--- a/tests/ut/python/dataset/test_exceptions.py
+++ b/tests/ut/python/dataset/test_exceptions.py
@@ -28,9 +28,9 @@ def test_exception_01():
     """
     logger.info("test_exception_01")
     data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
-    with pytest.raises(ValueError) as info:
-        data = data.map(input_columns=["image"], operations=vision.Resize(100, 100))
-    assert "Invalid interpolation mode." in str(info.value)
+    with pytest.raises(TypeError) as info:
+        data.map(input_columns=["image"], operations=vision.Resize(100, 100))
+    assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
 
 
 def test_exception_02():
@@ -40,8 +40,8 @@ def test_exception_02():
     logger.info("test_exception_02")
     num_samples = -1
     with pytest.raises(ValueError) as info:
-        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
-    assert "num_samples cannot be less than 0" in str(info.value)
+        ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
+    assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str(info.value)
 
     num_samples = 1
     data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py
index f138dd15ec..657a529723 100644
--- a/tests/ut/python/dataset/test_fill_op.py
+++ b/tests/ut/python/dataset/test_fill_op.py
@@ -82,9 +82,9 @@ def test_fillop_error_handling():
     data = data.map(input_columns=["col"], operations=fill_op)
 
     with pytest.raises(RuntimeError) as error_info:
-        for data_row in data:
-            print(data_row)
-    assert "Types do not match" in repr(error_info.value)
+        for _ in data:
+            pass
+    assert "Types do not match" in str(error_info.value)
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_from_dataset.py b/tests/ut/python/dataset/test_from_dataset.py
index 207a6be6a1..983052ea08 100644
--- a/tests/ut/python/dataset/test_from_dataset.py
+++ b/tests/ut/python/dataset/test_from_dataset.py
@@ -23,9 +23,10 @@ import mindspore.dataset.text as text
 def test_demo_basic_from_dataset():
     """ this is a tutorial on how from_dataset should be used in a normal use case"""
     data = ds.TextFileDataset("../data/dataset/testVocab/words.txt", shuffle=False)
-    vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
+    vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
+                                    special_tokens=["<pad>", "<unk>"],
                                     special_first=True)
-    data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+    data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
     res = []
     for d in data.create_dict_iterator():
         res.append(d["text"].item())
@@ -38,7 +39,7 @@ def test_demo_basic_from_dataset_with_tokenizer():
     data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer())
     vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
                                     special_first=True)
-    data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+    data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
     res = []
     for d in data.create_dict_iterator():
         res.append(list(d["text"]))
@@ -59,7 +60,7 @@ def test_from_dataset():
         corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
         vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
                                         special_first=True)
-        corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab))
+        corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
         res = []
         for d in corpus_dataset.create_dict_iterator():
             res.append(list(d["text"]))
@@ -107,7 +108,7 @@ def test_from_dataset_special_token():
         corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
         vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
         data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
-        data = data.map(input_columns="text", operations=text.Lookup(vocab))
+        data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
         res = []
         for d in data.create_dict_iterator():
             res.append(d["text"].item())
@@ -127,15 +128,16 @@ def test_from_dataset_exceptions():
             data = ds.TextFileDataset("../data/dataset/testVocab/words.txt", shuffle=False)
             vocab = text.Vocab.from_dataset(data, columns, freq_range, top_k)
             assert isinstance(vocab.text.Vocab)
-        except ValueError as e:
+        except (TypeError, ValueError) as e:
             assert s in str(e), str(e)
 
-    test_config("text", (), 1, "freq_range needs to be either None or a tuple of 2 integers")
-    test_config("text", (2, 3), 1.2345, "top_k needs to be a positive integer")
-    test_config(23, (2, 3), 1.2345, "columns need to be a list of strings")
-    test_config("text", (100, 1), 12, "frequency range [a,b] should be 0 <= a <= b")
-    test_config("text", (2, 3), 0, "top_k needs to be a positive integer")
-    test_config([123], (2, 3), 0, "columns need to be a list of strings")
+    test_config("text", (), 1, "freq_range needs to be a tuple of 2 integers or an int and a None.")
+    test_config("text", (2, 3), 1.2345,
+                "Argument top_k with value 1.2345 is not of type (<class 'int'>, <class 'NoneType'>)")
+    test_config(23, (2, 3), 1.2345, "Argument col_0 with value 23 is not of type (<class 'str'>,)")
+    test_config("text", (100, 1), 12, "frequency range [a,b] should be 0 <= a <= b (a,b are inclusive)")
+    test_config("text", (2, 3), 0, "top_k must be greater than 0")
+    test_config([123], (2, 3), -1, "top_k must be greater than 0")
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/dataset/test_graphdata.py b/tests/ut/python/dataset/test_graphdata.py
index 4083336623..0f78cfd03a 100644
--- a/tests/ut/python/dataset/test_graphdata.py
+++ b/tests/ut/python/dataset/test_graphdata.py
@@ -23,6 +23,10 @@ SOCIAL_DATA_FILE = "../data/mindrecord/testGraphData/sns"
 
 
 def test_graphdata_getfullneighbor():
+    """
+    Test get all neighbors
+    """
+    logger.info('test get all neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     nodes = g.get_all_nodes(1)
     assert len(nodes) == 10
@@ -33,6 +37,10 @@ def test_graphdata_getfullneighbor():
 
 
 def test_graphdata_getnodefeature_input_check():
+    """
+    Test get node feature input check
+    """
+    logger.info('test getnodefeature input check.\n')
     g = ds.GraphData(DATASET_FILE)
     with pytest.raises(TypeError):
         input_list = [1, [1, 1]]
@@ -80,6 +88,10 @@ def test_graphdata_getnodefeature_input_check():
 
 
 def test_graphdata_getsampledneighbors():
+    """
+    Test sampled neighbors
+    """
+    logger.info('test get sampled neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 1)
     edges = g.get_all_edges(0)
     nodes = g.get_nodes_from_edges(edges)
@@ -90,6 +102,10 @@ def test_graphdata_getsampledneighbors():
 
 
 def test_graphdata_getnegsampledneighbors():
+    """
+    Test neg sampled neighbors
+    """
+    logger.info('test get negative sampled neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     nodes = g.get_all_nodes(1)
     assert len(nodes) == 10
@@ -98,6 +114,10 @@ def test_graphdata_getnegsampledneighbors():
 
 
 def test_graphdata_graphinfo():
+    """
+    Test graph info
+    """
+    logger.info('test graph info.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     graph_info = g.graph_info()
     assert graph_info['node_type'] == [1, 2]
@@ -105,7 +125,7 @@ def test_graphdata_graphinfo():
     assert graph_info['node_num'] == {1: 10, 2: 10}
     assert graph_info['edge_num'] == {0: 40}
     assert graph_info['node_feature_type'] == [1, 2, 3, 4]
-    assert graph_info['edge_feature_type'] == []
+    assert graph_info['edge_feature_type'] == [1, 2]
 
 
 class RandomBatchedSampler(ds.Sampler):
@@ -155,6 +175,10 @@ class GNNGraphDataset():
 
 
 def test_graphdata_generatordataset():
+    """
+    Test generator dataset
+    """
+    logger.info('test generator dataset.\n')
     g = ds.GraphData(DATASET_FILE)
     batch_num = 2
     edge_num = g.graph_info()['edge_num'][0]
@@ -173,10 +197,13 @@ def test_graphdata_generatordataset():
     assert i == 40
 
 
-def test_graphdata_randomwalk():
+def test_graphdata_randomwalkdefault():
+    """
+    Test random walk defaults
+    """
+    logger.info('test randomwalk with default parameters.\n')
     g = ds.GraphData(SOCIAL_DATA_FILE, 1)
     nodes = g.get_all_nodes(1)
-    print(len(nodes))
     assert len(nodes) == 33
 
     meta_path = [1 for _ in range(39)]
@@ -184,18 +211,39 @@ def test_graphdata_randomwalk():
     assert walks.shape == (33, 40)
 
 
+def test_graphdata_randomwalk():
+    """
+    Test random walk
+    """
+    logger.info('test random walk with given parameters.\n')
+    g = ds.GraphData(SOCIAL_DATA_FILE, 1)
+    nodes = g.get_all_nodes(1)
+    assert len(nodes) == 33
+
+    meta_path = [1 for _ in range(39)]
+    walks = g.random_walk(nodes, meta_path, 2.0, 0.5, -1)
+    assert walks.shape == (33, 40)
+
+
+def test_graphdata_getedgefeature():
+    """
+    Test get edge feature
+    """
+    logger.info('test get_edge_feature.\n')
+    g = ds.GraphData(DATASET_FILE)
+    edges = g.get_all_edges(0)
+    features = g.get_edge_feature(edges, [1, 2])
+    assert features[0].shape == (40,)
+    assert features[1].shape == (40,)
+
+
 if __name__ == '__main__':
     test_graphdata_getfullneighbor()
-    logger.info('test_graphdata_getfullneighbor Ended.\n')
     test_graphdata_getnodefeature_input_check()
-    logger.info('test_graphdata_getnodefeature_input_check Ended.\n')
     test_graphdata_getsampledneighbors()
-    logger.info('test_graphdata_getsampledneighbors Ended.\n')
     test_graphdata_getnegsampledneighbors()
-    logger.info('test_graphdata_getnegsampledneighbors Ended.\n')
     test_graphdata_graphinfo()
-    logger.info('test_graphdata_graphinfo Ended.\n')
     test_graphdata_generatordataset()
-    logger.info('test_graphdata_generatordataset Ended.\n')
+    test_graphdata_randomwalkdefault()
     test_graphdata_randomwalk()
-    logger.info('test_graphdata_randomwalk Ended.\n')
+    test_graphdata_getedgefeature()
diff --git a/tests/ut/python/dataset/test_linear_transformation.py b/tests/ut/python/dataset/test_linear_transformation.py
index 0dd25a4da1..f932916ed8 100644
--- a/tests/ut/python/dataset/test_linear_transformation.py
+++ b/tests/ut/python/dataset/test_linear_transformation.py
@@ -73,6 +73,7 @@ def test_linear_transformation_op(plot=False):
     if plot:
         visualize_list(image, image_transformed)
 
+
 def test_linear_transformation_md5():
     """
     Test LinearTransformation op: valid params (transformation_matrix, mean_vector)
@@ -102,6 +103,7 @@ def test_linear_transformation_md5():
     filename = "linear_transformation_01_result.npz"
     save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
 
+
 def test_linear_transformation_exception_01():
     """
     Test LinearTransformation op: transformation_matrix is not provided
@@ -126,9 +128,10 @@ def test_linear_transformation_exception_01():
         ]
         transform = py_vision.ComposeOp(transforms)
         data1 = data1.map(input_columns=["image"], operations=transform())
-    except ValueError as e:
+    except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "not provided" in str(e)
+        assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
+
 
 def test_linear_transformation_exception_02():
     """
@@ -154,9 +157,10 @@ def test_linear_transformation_exception_02():
         ]
         transform = py_vision.ComposeOp(transforms)
         data1 = data1.map(input_columns=["image"], operations=transform())
-    except ValueError as e:
+    except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "not provided" in str(e)
+        assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
+
 
 def test_linear_transformation_exception_03():
     """
@@ -187,6 +191,7 @@ def test_linear_transformation_exception_03():
         logger.info("Got an exception in DE: {}".format(str(e)))
         assert "square matrix" in str(e)
 
+
 def test_linear_transformation_exception_04():
     """
     Test LinearTransformation op: mean_vector does not match dimension of transformation_matrix
@@ -199,7 +204,7 @@ def test_linear_transformation_exception_04():
     weight = 50
     dim = 3 * height * weight
     transformation_matrix = np.ones([dim, dim])
-    mean_vector = np.zeros(dim-1)
+    mean_vector = np.zeros(dim - 1)
 
     # Generate dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@@ -216,6 +221,7 @@ def test_linear_transformation_exception_04():
         logger.info("Got an exception in DE: {}".format(str(e)))
         assert "should match" in str(e)
 
+
 if __name__ == '__main__':
     test_linear_transformation_op(plot=True)
     test_linear_transformation_md5()
diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py
index b15944d76b..0b4d0dfc8f 100644
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@@ -184,24 +184,26 @@ def test_minddataset_invalidate_num_shards():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, 2)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info)
+
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
 
-
 def test_minddataset_invalidate_shard_id():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, -1)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info)
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
 
@@ -210,17 +212,19 @@ def test_minddataset_shard_id_bigger_than_num_shard():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 2)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info)
 
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info)
 
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
diff --git a/tests/ut/python/dataset/test_ngram_op.py b/tests/ut/python/dataset/test_ngram_op.py
index 73b2702378..777fca8764 100644
--- a/tests/ut/python/dataset/test_ngram_op.py
+++ b/tests/ut/python/dataset/test_ngram_op.py
@@ -15,9 +15,9 @@
 """
 Testing Ngram in mindspore.dataset
 """
+import numpy as np
 import mindspore.dataset as ds
 import mindspore.dataset.text as text
-import numpy as np
 
 
 def test_multiple_ngrams():
@@ -61,7 +61,7 @@ def test_simple_ngram():
             yield (np.array(line.split(" "), dtype='S'),)
 
     dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
-    dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=None))
+    dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" "))
 
     i = 0
     for data in dataset.create_dict_iterator():
@@ -72,43 +72,36 @@ def test_simple_ngram():
 def test_corner_cases():
     """ testing various corner cases and exceptions"""
 
-    def test_config(input_line, output_line, n, l_pad=None, r_pad=None, sep=None):
+    def test_config(input_line, n, l_pad=("", 0), r_pad=("", 0), sep=" "):
         def gen(texts):
             yield (np.array(texts.split(" "), dtype='S'),)
 
-        dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
-        dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
-        for data in dataset.create_dict_iterator():
-            assert [d.decode("utf8") for d in data["text"]] == output_line, output_line
+        try:
+            dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
+            dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
+            for data in dataset.create_dict_iterator():
+                return [d.decode("utf8") for d in data["text"]]
+        except (ValueError, TypeError) as e:
+            return str(e)
 
     # test tensor length smaller than n
-    test_config("Lone Star", ["Lone Star", "", "", ""], [2, 3, 4, 5])
+    assert test_config("Lone Star", [2, 3, 4, 5]) == ["Lone Star", "", "", ""]
     # test empty separator
-    test_config("Beautiful British Columbia", ['BeautifulBritish', 'BritishColumbia'], 2, sep="")
+    assert test_config("Beautiful British Columbia", 2, sep="") == ['BeautifulBritish', 'BritishColumbia']
     # test separator with longer length
-    test_config("Beautiful British Columbia", ['Beautiful^-^British^-^Columbia'], 3, sep="^-^")
+    assert test_config("Beautiful British Columbia", 3, sep="^-^") == ['Beautiful^-^British^-^Columbia']
     # test left pad != right pad
-    test_config("Lone Star", ['The Lone Star State'], 4, ("The", 1), ("State", 1))
+    assert test_config("Lone Star", 4, ("The", 1), ("State", 1)) == ['The Lone Star State']
     # test invalid n
-    try:
-        test_config("Yours to Discover", "", [0, [1]])
-    except Exception as e:
-        assert "ngram needs to be a positive number" in str(e)
-    # test empty n
-    try:
-        test_config("Yours to Discover", "", [])
-    except Exception as e:
-        assert "n needs to be a non-empty list" in str(e)
-    # test invalid pad
-    try:
-        test_config("Yours to Discover", "", [1], ("str", -1))
-    except Exception as e:
-        assert "padding width need to be positive numbers" in str(e)
+    assert "gram[1] with value [1] is not of type (<class 'int'>,)" in test_config("Yours to Discover", [1, [1]])
+    assert "n needs to be a non-empty list" in test_config("Yours to Discover", [])
     # test invalid pad
-    try:
-        test_config("Yours to Discover", "", [1], ("str", "rts"))
-    except Exception as e:
-        assert "pad needs to be a tuple of (str, int)" in str(e)
+    assert "padding width need to be positive numbers" in test_config("Yours to Discover", [1], ("str", -1))
+    assert "pad needs to be a tuple of (str, int)" in test_config("Yours to Discover", [1], ("str", "rts"))
+    # test 0 as in valid input
+    assert "gram_0 must be greater than 0" in test_config("Yours to Discover", 0)
+    assert "gram_0 must be greater than 0" in test_config("Yours to Discover", [0])
+    assert "gram_1 must be greater than 0" in test_config("Yours to Discover", [1, 0])
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py
index 6b44cfc80b..cb517160a1 100644
--- a/tests/ut/python/dataset/test_nlp.py
+++ b/tests/ut/python/dataset/test_nlp.py
@@ -34,13 +34,32 @@ def test_on_tokenized_line():
             jieba_op.add_word(word)
     data = data.map(input_columns=["text"], operations=jieba_op)
     vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
-    lookup = text.Lookup(vocab)
+    lookup = text.Lookup(vocab, "<unk>")
     data = data.map(input_columns=["text"], operations=lookup)
     res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
                     [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
     for i, d in enumerate(data.create_dict_iterator()):
-        _ = (np.testing.assert_array_equal(d["text"], res[i]), i)
+        np.testing.assert_array_equal(d["text"], res[i])
+
+
+def test_on_tokenized_line_with_no_special_tokens():
+    data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False)
+    jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP)
+    with open(VOCAB_FILE, 'r') as f:
+        for line in f:
+            word = line.split(',')[0]
+            jieba_op.add_word(word)
+
+    data = data.map(input_columns=["text"], operations=jieba_op)
+    vocab = text.Vocab.from_file(VOCAB_FILE, ",")
+    lookup = text.Lookup(vocab, "not")
+    data = data.map(input_columns=["text"], operations=lookup)
+    res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
+                    [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
+    for i, d in enumerate(data.create_dict_iterator()):
+        np.testing.assert_array_equal(d["text"], res[i])
 
 
 if __name__ == '__main__':
     test_on_tokenized_line()
+    test_on_tokenized_line_with_no_special_tokens()
diff --git a/tests/ut/python/dataset/test_nlp_jieop.py b/tests/ut/python/dataset/test_nlp_jieop.py
deleted file mode 100644
index 1ab53205d0..0000000000
--- a/tests/ut/python/dataset/test_nlp_jieop.py
+++ /dev/null
@@ -1,238 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import numpy as np
-import mindspore.dataset as ds
-from mindspore.dataset.text import JiebaTokenizer
-from mindspore.dataset.text import JiebaMode, to_str
-
-DATA_FILE = "../data/dataset/testJiebaDataset/3.txt"
-DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*"
-
-HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8"
-MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8"
-
-
-def test_jieba_1():
-    """Test jieba tokenizer with MP mode"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    ret = []
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_1_1():
-    """Test jieba tokenizer with HMM mode"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_1_2():
-    """Test jieba tokenizer with HMM MIX"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2():
-    """Test add_word"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("男默女泪")
-    expect = ['男默女泪', '市', '长江大桥']
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2_1():
-    """Test add_word with freq"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("男默女泪", 10)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    expect = ['男默女泪', '市', '长江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2_2():
-    """Test add_word with invalid None Input"""
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    try:
-        jieba_op.add_word(None)
-    except ValueError:
-        pass
-
-
-def test_jieba_2_3():
-    """Test add_word with freq, the value of freq affects the result of segmentation"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("江大桥", 20000)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_3():
-    """Test add_dict with dict"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    user_dict = {
-        "男默女泪": 10
-    }
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(user_dict)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['男默女泪', '市', '长江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_3_1():
-    """Test add_dict with dict"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    user_dict = {
-        "男默女泪": 10,
-        "江大桥": 20000
-    }
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(user_dict)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['男默女泪', '市长', '江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_4():
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
-    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
-
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(DICT_FILE)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_4_1():
-    """Test add dict with invalid file path"""
-    DICT_FILE = ""
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    try:
-        jieba_op.add_dict(DICT_FILE)
-    except ValueError:
-        pass
-
-
-def test_jieba_5():
-    """Test add dict with file path"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
-
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("江大桥", 20000)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def gen():
-    text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S')
-    yield (text,)
-
-
-def pytoken_op(input_data):
-    te = str(to_str(input_data))
-    tokens = []
-    tokens.append(te[:5].encode("UTF8"))
-    tokens.append(te[5:10].encode("UTF8"))
-    tokens.append(te[10:].encode("UTF8"))
-    return np.array(tokens, dtype='S')
-
-
-def test_jieba_6():
-    data = ds.GeneratorDataset(gen, column_names=["text"])
-    data = data.map(input_columns=["text"],
-                    operations=pytoken_op, num_parallel_workers=1)
-    expect = ['今天天气太', '好了我们一', '起去外面玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-if __name__ == "__main__":
-    test_jieba_1()
-    test_jieba_1_1()
-    test_jieba_1_2()
-    test_jieba_2()
-    test_jieba_2_1()
-    test_jieba_2_2()
-    test_jieba_3()
-    test_jieba_3_1()
-    test_jieba_4()
-    test_jieba_4_1()
-    test_jieba_5()
-    test_jieba_5()
-    test_jieba_6()
diff --git a/tests/ut/python/dataset/test_noop_mode.py b/tests/ut/python/dataset/test_noop_mode.py
new file mode 100644
index 0000000000..0ea9673200
--- /dev/null
+++ b/tests/ut/python/dataset/test_noop_mode.py
@@ -0,0 +1,45 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Test No-op mode support with Dummy Iterator
+"""
+import os
+import mindspore.dataset as ds
+
+DATA_DIR = "../data/dataset/testVOC2012"
+
+def test_noop_pserver():
+    os.environ['MS_ROLE'] = 'MS_PSERVER'
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
+    num = 0
+    for _ in data1.create_dict_iterator():
+        num += 1
+    assert num == 0
+    del os.environ['MS_ROLE']
+
+
+def test_noop_sched():
+    os.environ['MS_ROLE'] = 'MS_SCHED'
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
+    num = 0
+    for _ in data1.create_dict_iterator():
+        num += 1
+    assert num == 0
+    del os.environ['MS_ROLE']
+
+
+if __name__ == '__main__':
+    test_noop_pserver()
+    test_noop_sched()
diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py
index af97ee0c08..d5ebc799f9 100644
--- a/tests/ut/python/dataset/test_normalizeOp.py
+++ b/tests/ut/python/dataset/test_normalizeOp.py
@@ -279,7 +279,7 @@ def test_normalize_exception_invalid_range_py():
         _ = py_vision.Normalize([0.75, 1.25, 0.5], [0.1, 0.18, 1.32])
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not within the required range" in str(e)
+        assert "Input mean_value is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_normalize_grayscale_md5_01():
diff --git a/tests/ut/python/dataset/test_onehot_op.py b/tests/ut/python/dataset/test_onehot_op.py
index 500f770b9b..44d98b0ae0 100644
--- a/tests/ut/python/dataset/test_onehot_op.py
+++ b/tests/ut/python/dataset/test_onehot_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 # ==============================================================================
 """
-Testing the one_hot op in DE
+Testing the OneHot Op
 """
 import numpy as np
 
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.c_transforms as data_trans
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
 from mindspore import log as logger
 from util import diff_mse
 
@@ -37,15 +38,15 @@ def one_hot(index, depth):
 
 def test_one_hot():
     """
-    Test one_hot
+    Test OneHot Tensor Operator
     """
-    logger.info("Test one_hot")
+    logger.info("test_one_hot")
 
     depth = 10
 
     # First dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    one_hot_op = data_trans.OneHot(depth)
+    one_hot_op = data_trans.OneHot(num_classes=depth)
     data1 = data1.map(input_columns=["label"], operations=one_hot_op, columns_order=["label"])
 
     # Second dataset
@@ -58,8 +59,54 @@ def test_one_hot():
         label2 = one_hot(item2["label"][0], depth)
         mse = diff_mse(label1, label2)
         logger.info("DE one_hot: {}, Numpy one_hot: {}, diff: {}".format(label1, label2, mse))
+        assert mse == 0
         num_iter += 1
+    assert num_iter == 3
+
+def test_one_hot_post_aug():
+    """
+    Test One Hot Encoding after Multiple Data Augmentation Operators
+    """
+    logger.info("test_one_hot_post_aug")
+    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
+
+    # Define data augmentation parameters
+    rescale = 1.0 / 255.0
+    shift = 0.0
+    resize_height, resize_width = 224, 224
+
+    # Define map operations
+    decode_op = c_vision.Decode()
+    rescale_op = c_vision.Rescale(rescale, shift)
+    resize_op = c_vision.Resize((resize_height, resize_width))
+
+    # Apply map operations on images
+    data1 = data1.map(input_columns=["image"], operations=decode_op)
+    data1 = data1.map(input_columns=["image"], operations=rescale_op)
+    data1 = data1.map(input_columns=["image"], operations=resize_op)
+
+    # Apply one-hot encoding on labels
+    depth = 4
+    one_hot_encode = data_trans.OneHot(depth)
+    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
+
+    # Apply datasets ops
+    buffer_size = 100
+    seed = 10
+    batch_size = 2
+    ds.config.set_seed(seed)
+    data1 = data1.shuffle(buffer_size=buffer_size)
+    data1 = data1.batch(batch_size, drop_remainder=True)
+
+    num_iter = 0
+    for item in data1.create_dict_iterator():
+        logger.info("image is: {}".format(item["image"]))
+        logger.info("label is: {}".format(item["label"]))
+        num_iter += 1
+
+    assert num_iter == 1
 
 
 if __name__ == "__main__":
     test_one_hot()
+    test_one_hot_post_aug()
diff --git a/tests/ut/python/dataset/test_pad_end_op.py b/tests/ut/python/dataset/test_pad_end_op.py
index 5742d73665..c25d6b9a95 100644
--- a/tests/ut/python/dataset/test_pad_end_op.py
+++ b/tests/ut/python/dataset/test_pad_end_op.py
@@ -61,6 +61,10 @@ def test_pad_end_exceptions():
         pad_compare([3, 4, 5], ["2"], 1, [])
     assert "a value in the list is not an integer." in str(info.value)
 
+    with pytest.raises(TypeError) as info:
+        pad_compare([1, 2], 3, -1, [1, 2, -1])
+    assert "Argument pad_end with value 3 is not of type (<class 'list'>,)" in str(info.value)
+
 
 if __name__ == "__main__":
     test_pad_end_basics()
diff --git a/tests/ut/python/dataset/test_random_affine.py b/tests/ut/python/dataset/test_random_affine.py
index b856684ed1..ec829eb53a 100644
--- a/tests/ut/python/dataset/test_random_affine.py
+++ b/tests/ut/python/dataset/test_random_affine.py
@@ -103,7 +103,7 @@ def test_random_affine_exception_negative_degrees():
         _ = py_vision.RandomAffine(degrees=-15)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "If degrees is a single number, it cannot be negative."
+        assert str(e) == "Input degrees is not within the required interval of (0 to inf)."
 
 
 def test_random_affine_exception_translation_range():
@@ -115,7 +115,7 @@ def test_random_affine_exception_translation_range():
         _ = py_vision.RandomAffine(degrees=15, translate=(0.1, 1.5))
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "translation values should be between 0 and 1"
+        assert str(e) == "Input translate at 1 is not within the required interval of (0.0 to 1.0)."
 
 
 def test_random_affine_exception_scale_value():
@@ -127,7 +127,7 @@ def test_random_affine_exception_scale_value():
         _ = py_vision.RandomAffine(degrees=15, scale=(0.0, 1.1))
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "scale values should be positive"
+        assert str(e) == "Input scale[0] must be greater than 0."
 
 
 def test_random_affine_exception_shear_value():
@@ -139,7 +139,7 @@ def test_random_affine_exception_shear_value():
         _ = py_vision.RandomAffine(degrees=15, shear=-5)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "If shear is a single number, it must be positive."
+        assert str(e) == "Input shear must be greater than 0."
 
 
 def test_random_affine_exception_degrees_size():
@@ -165,7 +165,9 @@ def test_random_affine_exception_translate_size():
         _ = py_vision.RandomAffine(degrees=15, translate=(0.1))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "translate should be a list or tuple of length 2."
+        assert str(
+            e) == "Argument translate with value 0.1 is not of type (<class 'list'>," \
+                  " <class 'tuple'>)."
 
 
 def test_random_affine_exception_scale_size():
@@ -178,7 +180,8 @@ def test_random_affine_exception_scale_size():
         _ = py_vision.RandomAffine(degrees=15, scale=(0.5))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "scale should be a list or tuple of length 2."
+        assert str(e) == "Argument scale with value 0.5 is not of type (<class 'tuple'>," \
+                         " <class 'list'>)."
 
 
 def test_random_affine_exception_shear_size():
@@ -191,7 +194,7 @@ def test_random_affine_exception_shear_size():
         _ = py_vision.RandomAffine(degrees=15, shear=(-5, 5, 10))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "shear should be a list or tuple and it must be of length 2 or 4."
+        assert str(e) == "shear must be of length 2 or 4."
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_random_color.py b/tests/ut/python/dataset/test_random_color.py
index 45847ba653..0015e8498f 100644
--- a/tests/ut/python/dataset/test_random_color.py
+++ b/tests/ut/python/dataset/test_random_color.py
@@ -97,7 +97,7 @@ def test_random_color_md5():
     data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
 
     transforms = F.ComposeOp([F.Decode(),
-                              F.RandomColor((0.5, 1.5)),
+                              F.RandomColor((0.1, 1.9)),
                               F.ToTensor()])
 
     data = data.map(input_columns="image", operations=transforms())
diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py
index de039e6d82..486d2cd5ed 100644
--- a/tests/ut/python/dataset/test_random_crop_and_resize.py
+++ b/tests/ut/python/dataset/test_random_crop_and_resize.py
@@ -232,7 +232,7 @@ def test_random_crop_and_resize_04_c():
         data = data.map(input_columns=["image"], operations=random_crop_and_resize_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_04_py():
@@ -255,7 +255,7 @@ def test_random_crop_and_resize_04_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_05_c():
@@ -275,7 +275,7 @@ def test_random_crop_and_resize_05_c():
         data = data.map(input_columns=["image"], operations=random_crop_and_resize_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_05_py():
@@ -298,7 +298,7 @@ def test_random_crop_and_resize_05_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
index b13dc466f7..599acc9560 100644
--- a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
@@ -25,34 +25,16 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
-     tests with MD5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    tests with MD5 check, expected to pass
     """
     logger.info("test_random_resized_crop_with_bbox_op_c")
 
@@ -60,22 +42,16 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     filename = "random_resized_crop_with_bbox_01_c_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@@ -94,26 +70,49 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_random_resized_crop_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_resized_crop_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomResizedCropWithBBox((512, 512), (0.5, 1), (0.5, 1))
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
-     tests on dynamically generated edge case, expected to pass
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    tests on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_resized_crop_with_bbox_op_edge_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -138,20 +137,17 @@ def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False):
 
 def test_random_resized_crop_with_bbox_op_invalid_c():
     """
-     Tests RandomResizedCropWithBBox on invalid constructor parameters, expected to raise ValueError
+    Tests RandomResizedCropWithBBox on invalid constructor parameters, expected to raise ValueError
     """
     logger.info("test_random_resized_crop_with_bbox_op_invalid_c")
 
     # Load dataset, only Augmented Dataset as test will raise ValueError
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # If input range of scale is not in the order of (min, max), ValueError will be raised.
         test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 0.5), (0.5, 0.5))
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -163,7 +159,7 @@ def test_random_resized_crop_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input range is not valid" in str(err)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(err)
 
 
 def test_random_resized_crop_with_bbox_op_invalid2_c():
@@ -172,15 +168,12 @@ def test_random_resized_crop_with_bbox_op_invalid2_c():
     """
     logger.info("test_random_resized_crop_with_bbox_op_invalid2_c")
     # Load dataset # only loading the to AugDataset as test will fail on this
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # If input range of ratio is not in the order of (min, max), ValueError will be raised.
         test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 1), (1, 0.5))
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -192,7 +185,7 @@ def test_random_resized_crop_with_bbox_op_invalid2_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input range is not valid" in str(err)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(err)
 
 
 def test_random_resized_crop_with_bbox_op_bad_c():
@@ -202,18 +195,19 @@ def test_random_resized_crop_with_bbox_op_bad_c():
     logger.info("test_random_resized_crop_with_bbox_op_bad_c")
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_resized_crop_with_bbox_op_c(plot_vis=True)
+    test_random_resized_crop_with_bbox_op_coco_c(plot_vis=True)
     test_random_resized_crop_with_bbox_op_edge_c(plot_vis=True)
     test_random_resized_crop_with_bbox_op_invalid_c()
     test_random_resized_crop_with_bbox_op_invalid2_c()
diff --git a/tests/ut/python/dataset/test_random_crop_with_bbox.py b/tests/ut/python/dataset/test_random_crop_with_bbox.py
index 9262dfd65d..b93c638f41 100644
--- a/tests/ut/python/dataset/test_random_crop_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_crop_with_bbox.py
@@ -26,49 +26,25 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_crop_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied
     """
     logger.info("test_random_crop_with_bbox_op_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op UT
     test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -85,33 +61,57 @@ def test_random_crop_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
+def test_random_crop_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_crop_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_random_crop_with_bbox_op2_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     with md5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    with md5 check, expected to pass
     """
     logger.info("test_random_crop_with_bbox_op2_c")
     original_seed = config_get_set_seed(593447)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], fill_value=(255, 255, 255))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     filename = "random_crop_with_bbox_01_c_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@@ -132,29 +132,23 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False):
 
 def test_random_crop_with_bbox_op3_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     with Padding Mode explicitly passed
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    with Padding Mode explicitly passed
     """
     logger.info("test_random_crop_with_bbox_op3_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     unaugSamp, augSamp = [], []
 
@@ -168,25 +162,18 @@ def test_random_crop_with_bbox_op3_c(plot_vis=False):
 
 def test_random_crop_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     applied on dynamically generated edge case, expected to pass
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    applied on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_crop_with_bbox_op_edge_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -216,16 +203,12 @@ def test_random_crop_with_bbox_op_invalid_c():
     logger.info("test_random_crop_with_bbox_op_invalid_c")
 
     # Load dataset
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # define test OP with values to match existing Op unit - test
         test_op = c_vision.RandomCropWithBBox([512, 512, 375])
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
-
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -246,18 +229,19 @@ def test_random_crop_with_bbox_op_bad_c():
     logger.info("test_random_crop_with_bbox_op_bad_c")
     test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_crop_with_bbox_op_c(plot_vis=True)
+    test_random_crop_with_bbox_op_coco_c(plot_vis=True)
     test_random_crop_with_bbox_op2_c(plot_vis=True)
     test_random_crop_with_bbox_op3_c(plot_vis=True)
     test_random_crop_with_bbox_op_edge_c(plot_vis=True)
diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py
index 4d50be254c..56a2a93113 100644
--- a/tests/ut/python/dataset/test_random_dataset.py
+++ b/tests/ut/python/dataset/test_random_dataset.py
@@ -16,17 +16,16 @@ import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 from mindspore import log as logger
 
-
 # just a basic test with parallel random data op
 def test_randomdataset_basic1():
-    logger.info("Test randomdataset basic")
+    logger.info("Test randomdataset basic 1")
 
     schema = ds.Schema()
     schema.add_column('image', de_type=mstype.uint8, shape=[2])
     schema.add_column('label', de_type=mstype.uint8, shape=[1])
 
     # apply dataset operations
-    ds1 = ds.RandomDataset(schema=schema, num_samples=50, num_parallel_workers=4)
+    ds1 = ds.RandomDataset(schema=schema, total_rows=50, num_parallel_workers=4)
     ds1 = ds1.repeat(4)
 
     num_iter = 0
@@ -36,8 +35,9 @@ def test_randomdataset_basic1():
         logger.info("{} label: {}".format(num_iter, data["label"]))
         num_iter += 1
 
-    logger.info("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: {}".format(num_iter))
     assert num_iter == 200
+    logger.info("Test randomdataset basic 1 complete")
 
 
 # Another simple test
@@ -49,10 +49,8 @@ def test_randomdataset_basic2():
                       shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
     schema.add_column('label', de_type=mstype.uint8, shape=[1])
 
-    # Make up about 10 samples
-    ds1 = ds.RandomDataset(schema=schema, num_samples=10, num_parallel_workers=1)
-
-    # cache size allows for about 4 images since each image just a bit less than 1MB, after that we will have to spill
+    # Make up 10 rows
+    ds1 = ds.RandomDataset(schema=schema, total_rows=10, num_parallel_workers=1)
     ds1 = ds1.repeat(4)
 
     num_iter = 0
@@ -62,11 +60,31 @@ def test_randomdataset_basic2():
         logger.info("printing the label: {}".format(data["label"]))
         num_iter += 1
 
-    logger.info("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: {}".format(num_iter))
     assert num_iter == 40
+    logger.info("Test randomdataset basic 2 complete")
+
+
+# Another simple test
+def test_randomdataset_basic3():
+    logger.info("Test randomdataset basic 3")
+
+    # Make up 10 samples, but here even the schema is randomly created
+    # The columns are named like this "c0", "c1", "c2" etc
+    # But, we will use a tuple iterator instead of dict iterator so the column names
+    # are not needed to iterate
+    ds1 = ds.RandomDataset(total_rows=10, num_parallel_workers=1)
+    ds1 = ds1.repeat(2)
+
+    num_iter = 0
+    for _ in ds1.create_tuple_iterator():
+        num_iter += 1
 
+    logger.info("Number of data in ds1: {}".format(num_iter))
+    assert num_iter == 20
+    logger.info("Test randomdataset basic 3 Complete")
 
 if __name__ == '__main__':
     test_randomdataset_basic1()
     test_randomdataset_basic2()
-    logger.info('test_randomdataset_basic Ended.\n')
+    test_randomdataset_basic3()
diff --git a/tests/ut/python/dataset/test_random_grayscale.py b/tests/ut/python/dataset/test_random_grayscale.py
index 83514a55f6..4cb25c3a3a 100644
--- a/tests/ut/python/dataset/test_random_grayscale.py
+++ b/tests/ut/python/dataset/test_random_grayscale.py
@@ -179,7 +179,7 @@ def test_random_grayscale_invalid_param():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not within the required range" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 if __name__ == "__main__":
     test_random_grayscale_valid_prob(True)
diff --git a/tests/ut/python/dataset/test_random_horizontal_flip.py b/tests/ut/python/dataset/test_random_horizontal_flip.py
index 1272148e4f..ef4f5b8eb6 100644
--- a/tests/ut/python/dataset/test_random_horizontal_flip.py
+++ b/tests/ut/python/dataset/test_random_horizontal_flip.py
@@ -141,7 +141,7 @@ def test_random_horizontal_invalid_prob_c():
         data = data.map(input_columns=["image"], operations=random_horizontal_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_random_horizontal_invalid_prob_py():
@@ -164,7 +164,7 @@ def test_random_horizontal_invalid_prob_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_random_horizontal_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
index 94ab843ce1..4fd51a7a03 100644
--- a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
@@ -24,33 +24,15 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
+# updated VOC dataset with correct annotations
 DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
 def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
     """
-    Prints images side by side with and without Aug applied + bboxes to
-    compare and test
+    Prints images and bboxes side by side with and without RandomHorizontalFlipWithBBox Op applied
     """
     logger.info("test_random_horizontal_flip_with_bbox_op_c")
 
@@ -63,14 +45,6 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -86,7 +60,37 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
-def test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False):
+def test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomHorizontalFlipWithBBox Op applied,
+    Testing with COCO dataset
+    """
+    logger.info("test_random_horizontal_flip_with_bbox_op_coco_c")
+
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomHorizontalFlipWithBBox(1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
+def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False):
     """
     Uses a valid non-default input, expect to pass
     Prints images side by side with and without Aug applied + bboxes to
@@ -106,13 +110,6 @@ def test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(0.6)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -148,25 +145,18 @@ def test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     # Add column for "annotation"
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -193,9 +183,6 @@ def test_random_horizontal_flip_with_bbox_invalid_prob_c():
     try:
         # Note: Valid range of prob should be [0.0, 1.0]
         test_op = c_vision.RandomHorizontalFlipWithBBox(1.5)
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -203,7 +190,7 @@ def test_random_horizontal_flip_with_bbox_invalid_prob_c():
                                 operations=[test_op])  # Add column for "annotation"
     except ValueError as error:
         logger.info("Got an exception in DE: {}".format(str(error)))
-        assert "Input is not" in str(error)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(error)
 
 
 def test_random_horizontal_flip_with_bbox_invalid_bounds_c():
@@ -227,7 +214,8 @@ def test_random_horizontal_flip_with_bbox_invalid_bounds_c():
 if __name__ == "__main__":
     # set to false to not show plots
     test_random_horizontal_flip_with_bbox_op_c(plot_vis=False)
-    test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False)
+    test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False)
+    test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False)
     test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False)
     test_random_horizontal_flip_with_bbox_invalid_prob_c()
     test_random_horizontal_flip_with_bbox_invalid_bounds_c()
diff --git a/tests/ut/python/dataset/test_random_perspective.py b/tests/ut/python/dataset/test_random_perspective.py
index 507c9cdb80..992bf2b222 100644
--- a/tests/ut/python/dataset/test_random_perspective.py
+++ b/tests/ut/python/dataset/test_random_perspective.py
@@ -67,7 +67,7 @@ def test_random_perspective_op(plot=False):
         visualize_list(image_original, image_perspective)
 
 
-def test_random_perspective_md5():
+def skip_test_random_perspective_md5():
     """
     Test RandomPerspective with md5 comparison
     """
@@ -107,7 +107,7 @@ def test_random_perspective_exception_distortion_scale_range():
         _ = py_vision.RandomPerspective(distortion_scale=1.5)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Input is not within the required range"
+        assert str(e) == "Input distortion_scale is not within the required interval of (0.0 to 1.0)."
 
 
 def test_random_perspective_exception_prob_range():
@@ -119,11 +119,11 @@ def test_random_perspective_exception_prob_range():
         _ = py_vision.RandomPerspective(prob=1.2)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Input is not within the required range"
+        assert str(e) == "Input prob is not within the required interval of (0.0 to 1.0)."
 
 
 if __name__ == "__main__":
     test_random_perspective_op(plot=True)
-    test_random_perspective_md5()
+    skip_test_random_perspective_md5()
     test_random_perspective_exception_distortion_scale_range()
     test_random_perspective_exception_prob_range()
diff --git a/tests/ut/python/dataset/test_random_resize_with_bbox.py b/tests/ut/python/dataset/test_random_resize_with_bbox.py
index 4aadf9ef01..94f9d12427 100644
--- a/tests/ut/python/dataset/test_random_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_resize_with_bbox.py
@@ -26,32 +26,18 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 GENERATE_GOLDEN = False
 
 DATA_DIR = "../data/dataset/testVOC2012_2"
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
-def fix_annotate(bboxes):
+def test_random_resize_with_bbox_op_voc_c(plot_vis=False):
     """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
+    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied
+    testing with VOC dataset
     """
-    for (i, box) in enumerate(bboxes):
-        if box.size == 7:
-            bboxes[i] = np.roll(box, -1)
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
-
-
-def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
-    """
-    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied,
-    tests with MD5 check, expected to pass
-    """
-    logger.info("test_random_resize_with_bbox_rand_c")
-    original_seed = config_get_set_seed(1)
+    logger.info("test_random_resize_with_bbox_op_voc_c")
+    original_seed = config_get_set_seed(123)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
-
     # Load dataset
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
@@ -59,21 +45,15 @@ def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    test_op = c_vision.RandomResizeWithBBox(200)
+    test_op = c_vision.RandomResizeWithBBox(100)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=[test_op])
 
-    filename = "random_resize_with_bbox_op_01_c_result.npz"
+    filename = "random_resize_with_bbox_op_01_c_voc_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
 
     unaugSamp, augSamp = [], []
@@ -90,6 +70,49 @@ def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied,
+    tests with MD5 check, expected to pass
+    testing with COCO dataset
+    """
+    logger.info("test_random_resize_with_bbox_op_rand_coco_c")
+    original_seed = config_get_set_seed(231)
+    original_num_parallel_workers = config_get_set_num_parallel_workers(1)
+
+    # Load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomResizeWithBBox(200)
+
+    # map to apply ops
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    filename = "random_resize_with_bbox_op_01_c_coco_result.npz"
+    save_and_check_md5(dataCoco2, filename, generate_golden=GENERATE_GOLDEN)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, annot_name="bbox")
+
+    # Restore config setting
+    ds.config.set_seed(original_seed)
+    ds.config.set_num_parallel_workers(original_num_parallel_workers)
+
+
 def test_random_resize_with_bbox_op_edge_c(plot_vis=False):
     """
     Prints images and bboxes side by side with and without RandomresizeWithBBox Op applied,
@@ -105,13 +128,6 @@ def test_random_resize_with_bbox_op_edge_c(plot_vis=False):
 
     test_op = c_vision.RandomResizeWithBBox(500)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -147,7 +163,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input is not within the required interval of (1 to 16777216)." in str(err)
 
     try:
         # one of the size values is zero
@@ -155,7 +171,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input size at dim 0 is not within the required interval of (1 to 2147483647)." in str(err)
 
     try:
         # negative value for resize
@@ -163,7 +179,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input is not within the required interval of (1 to 16777216)." in str(err)
 
     try:
         # invalid input shape
@@ -192,7 +208,8 @@ def test_random_resize_with_bbox_op_bad_c():
 
 
 if __name__ == "__main__":
-    test_random_resize_with_bbox_op_rand_c(plot_vis=False)
+    test_random_resize_with_bbox_op_voc_c(plot_vis=False)
+    test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False)
     test_random_resize_with_bbox_op_edge_c(plot_vis=False)
     test_random_resize_with_bbox_op_invalid_c()
     test_random_resize_with_bbox_op_bad_c()
diff --git a/tests/ut/python/dataset/test_random_sharpness.py b/tests/ut/python/dataset/test_random_sharpness.py
index d8207ff099..22e5c66f1a 100644
--- a/tests/ut/python/dataset/test_random_sharpness.py
+++ b/tests/ut/python/dataset/test_random_sharpness.py
@@ -97,7 +97,7 @@ def test_random_sharpness_md5():
     # define map operations
     transforms = [
         F.Decode(),
-        F.RandomSharpness((0.5, 1.5)),
+        F.RandomSharpness((0.1, 1.9)),
         F.ToTensor()
     ]
     transform = F.ComposeOp(transforms)
diff --git a/tests/ut/python/dataset/test_random_vertical_flip.py b/tests/ut/python/dataset/test_random_vertical_flip.py
index 2fc9b12774..a3d02959fd 100644
--- a/tests/ut/python/dataset/test_random_vertical_flip.py
+++ b/tests/ut/python/dataset/test_random_vertical_flip.py
@@ -141,7 +141,7 @@ def test_random_vertical_invalid_prob_c():
         data = data.map(input_columns=["image"], operations=random_horizontal_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e)
 
 
 def test_random_vertical_invalid_prob_py():
@@ -163,7 +163,7 @@ def test_random_vertical_invalid_prob_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e)
 
 
 def test_random_vertical_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
index f746bd50b0..490dc3e419 100644
--- a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
@@ -25,50 +25,26 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_vertical_flip_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied
     """
     logger.info("test_random_vertical_flip_with_bbox_op_c")
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -84,31 +60,56 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False):
     if plot_vis:
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
+def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_vertical_flip_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomVerticalFlipWithBBox(1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    test_op = c_vision.RandomVerticalFlipWithBBox(1)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
 
 def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
-     tests with MD5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    tests with MD5 check, expected to pass
     """
     logger.info("test_random_vertical_flip_with_bbox_op_rand_c")
     original_seed = config_get_set_seed(29847)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(0.8)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -134,25 +135,18 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
 
 def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
     applied on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_vertical_flip_with_bbox_op_edge_c")
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -177,17 +171,15 @@ def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False):
 
 def test_random_vertical_flip_with_bbox_op_invalid_c():
     """
-     Test RandomVerticalFlipWithBBox Op on invalid constructor parameters, expected to raise ValueError
+    Test RandomVerticalFlipWithBBox Op on invalid constructor parameters, expected to raise ValueError
     """
     logger.info("test_random_vertical_flip_with_bbox_op_invalid_c")
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     try:
         test_op = c_vision.RandomVerticalFlipWithBBox(2)
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
+
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -199,7 +191,7 @@ def test_random_vertical_flip_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(err)
 
 
 def test_random_vertical_flip_with_bbox_op_bad_c():
@@ -209,18 +201,19 @@ def test_random_vertical_flip_with_bbox_op_bad_c():
     logger.info("test_random_vertical_flip_with_bbox_op_bad_c")
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_vertical_flip_with_bbox_op_c(plot_vis=True)
+    test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_invalid_c()
diff --git a/tests/ut/python/dataset/test_repeat.py b/tests/ut/python/dataset/test_repeat.py
index 4bdde7beeb..ca4702ff8c 100644
--- a/tests/ut/python/dataset/test_repeat.py
+++ b/tests/ut/python/dataset/test_repeat.py
@@ -12,25 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""
+Test Repeat Op
+"""
 import numpy as np
-from util import save_and_check
 
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as vision
 from mindspore import log as logger
+from util import save_and_check_dict
 
 DATA_DIR_TF = ["../data/dataset/testTFTestAllTypes/test.data"]
 SCHEMA_DIR_TF = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
-COLUMNS_TF = ["col_1d", "col_2d", "col_3d", "col_binary", "col_float",
-              "col_sint16", "col_sint32", "col_sint64"]
-GENERATE_GOLDEN = False
-
-IMG_DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
-IMG_SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
 DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR_TF2 = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
+GENERATE_GOLDEN = False
+
 
 def test_tf_repeat_01():
     """
@@ -39,14 +38,13 @@ def test_tf_repeat_01():
     logger.info("Test Simple Repeat")
     # define parameters
     repeat_count = 2
-    parameters = {"params": {'repeat_count': repeat_count}}
 
     # apply dataset operations
     data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False)
     data1 = data1.repeat(repeat_count)
 
     filename = "repeat_result.npz"
-    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
 
 
 def test_tf_repeat_02():
@@ -99,14 +97,13 @@ def test_tf_repeat_04():
     logger.info("Test Simple Repeat Column List")
     # define parameters
     repeat_count = 2
-    parameters = {"params": {'repeat_count': repeat_count}}
     columns_list = ["col_sint64", "col_sint32"]
     # apply dataset operations
     data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, columns_list=columns_list, shuffle=False)
     data1 = data1.repeat(repeat_count)
 
     filename = "repeat_list_result.npz"
-    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
 
 
 def generator():
@@ -115,6 +112,7 @@ def generator():
 
 
 def test_nested_repeat1():
+    logger.info("test_nested_repeat1")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
@@ -126,6 +124,7 @@ def test_nested_repeat1():
 
 
 def test_nested_repeat2():
+    logger.info("test_nested_repeat2")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(1)
     data = data.repeat(1)
@@ -137,6 +136,7 @@ def test_nested_repeat2():
 
 
 def test_nested_repeat3():
+    logger.info("test_nested_repeat3")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(1)
     data = data.repeat(2)
@@ -148,6 +148,7 @@ def test_nested_repeat3():
 
 
 def test_nested_repeat4():
+    logger.info("test_nested_repeat4")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(1)
@@ -159,6 +160,7 @@ def test_nested_repeat4():
 
 
 def test_nested_repeat5():
+    logger.info("test_nested_repeat5")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.batch(3)
     data = data.repeat(2)
@@ -171,6 +173,7 @@ def test_nested_repeat5():
 
 
 def test_nested_repeat6():
+    logger.info("test_nested_repeat6")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.batch(3)
@@ -183,6 +186,7 @@ def test_nested_repeat6():
 
 
 def test_nested_repeat7():
+    logger.info("test_nested_repeat7")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
@@ -195,6 +199,7 @@ def test_nested_repeat7():
 
 
 def test_nested_repeat8():
+    logger.info("test_nested_repeat8")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.batch(2, drop_remainder=False)
     data = data.repeat(2)
@@ -210,6 +215,7 @@ def test_nested_repeat8():
 
 
 def test_nested_repeat9():
+    logger.info("test_nested_repeat9")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat()
     data = data.repeat(3)
@@ -221,6 +227,7 @@ def test_nested_repeat9():
 
 
 def test_nested_repeat10():
+    logger.info("test_nested_repeat10")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(3)
     data = data.repeat()
@@ -232,6 +239,7 @@ def test_nested_repeat10():
 
 
 def test_nested_repeat11():
+    logger.info("test_nested_repeat11")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
diff --git a/tests/ut/python/dataset/test_resize_with_bbox.py b/tests/ut/python/dataset/test_resize_with_bbox.py
index 06f3937958..3bb731ee97 100644
--- a/tests/ut/python/dataset/test_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_resize_with_bbox.py
@@ -26,29 +26,16 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 GENERATE_GOLDEN = False
 
 DATA_DIR = "../data/dataset/testVOC2012_2"
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
-def fix_annotate(bboxes):
+def test_resize_with_bbox_op_voc_c(plot_vis=False):
     """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
+    Prints images and bboxes side by side with and without ResizeWithBBox Op applied
+    testing with VOC dataset
     """
-    for (i, box) in enumerate(bboxes):
-        if box.size == 7:
-            bboxes[i] = np.roll(box, -1)
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
-
-
-def test_resize_with_bbox_op_c(plot_vis=False):
-    """
-    Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
-    tests with MD5 check, expected to pass
-    """
-    logger.info("test_resize_with_bbox_op_c")
+    logger.info("test_resize_with_bbox_op_voc_c")
 
     # Load dataset
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
@@ -57,21 +44,15 @@ def test_resize_with_bbox_op_c(plot_vis=False):
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    test_op = c_vision.ResizeWithBBox(200)
+    test_op = c_vision.ResizeWithBBox(100)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=[test_op])
 
-    filename = "resize_with_bbox_op_01_c_result.npz"
+    filename = "resize_with_bbox_op_01_c_voc_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
 
     unaugSamp, augSamp = [], []
@@ -84,6 +65,43 @@ def test_resize_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
+def test_resize_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
+    tests with MD5 check, expected to pass
+    Testing with COCO dataset
+    """
+    logger.info("test_resize_with_bbox_op_coco_c")
+
+    # Load dataset
+    dataCOCO1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCOCO2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.ResizeWithBBox(200)
+
+    # map to apply ops
+
+    dataCOCO2 = dataCOCO2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    filename = "resize_with_bbox_op_01_c_coco_result.npz"
+    save_and_check_md5(dataCOCO2, filename, generate_golden=GENERATE_GOLDEN)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCOCO1.create_dict_iterator(), dataCOCO2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, annot_name="bbox")
+
+
 def test_resize_with_bbox_op_edge_c(plot_vis=False):
     """
     Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
@@ -99,13 +117,6 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False):
 
     test_op = c_vision.ResizeWithBBox(500)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -113,7 +124,6 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False):
                             operations=[lambda img, bboxes: (
                                 img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))])
 
-    # Test Op added to list of Operations here
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -140,7 +150,7 @@ def test_resize_with_bbox_op_invalid_c():
         # invalid interpolation value
         c_vision.ResizeWithBBox(400, interpolation="invalid")
 
-    except ValueError as err:
+    except TypeError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
         assert "interpolation" in str(err)
 
@@ -163,7 +173,8 @@ def test_resize_with_bbox_op_bad_c():
 
 
 if __name__ == "__main__":
-    test_resize_with_bbox_op_c(plot_vis=False)
+    test_resize_with_bbox_op_voc_c(plot_vis=False)
+    test_resize_with_bbox_op_coco_c(plot_vis=False)
     test_resize_with_bbox_op_edge_c(plot_vis=False)
     test_resize_with_bbox_op_invalid_c()
     test_resize_with_bbox_op_bad_c()
diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py
index 56cc65a23b..460c491ca1 100644
--- a/tests/ut/python/dataset/test_shuffle.py
+++ b/tests/ut/python/dataset/test_shuffle.py
@@ -154,7 +154,7 @@ def test_shuffle_exception_01():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_02():
@@ -172,7 +172,7 @@ def test_shuffle_exception_02():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_03():
@@ -190,7 +190,7 @@ def test_shuffle_exception_03():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_05():
diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py
index a5727a2991..eb2261a5d3 100644
--- a/tests/ut/python/dataset/test_sync_wait.py
+++ b/tests/ut/python/dataset/test_sync_wait.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 
 import numpy as np
-
+import pytest
 import mindspore.dataset as ds
 from mindspore import log as logger
 
@@ -163,7 +163,6 @@ def test_sync_exception_01():
     """
     logger.info("test_sync_exception_01")
     shuffle_size = 4
-    batch_size = 10
 
     dataset = ds.GeneratorDataset(gen, column_names=["input"])
 
@@ -171,11 +170,9 @@ def test_sync_exception_01():
     dataset = dataset.sync_wait(condition_name="policy", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
 
-    try:
-        dataset = dataset.shuffle(shuffle_size)
-    except Exception as e:
-        assert "shuffle" in str(e)
-    dataset = dataset.batch(batch_size)
+    with pytest.raises(RuntimeError) as e:
+        dataset.shuffle(shuffle_size)
+    assert "No shuffle after sync operators" in str(e.value)
 
 
 def test_sync_exception_02():
@@ -183,7 +180,6 @@ def test_sync_exception_02():
     Test sync: with duplicated condition name
     """
     logger.info("test_sync_exception_02")
-    batch_size = 6
 
     dataset = ds.GeneratorDataset(gen, column_names=["input"])
 
@@ -192,11 +188,9 @@ def test_sync_exception_02():
 
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
 
-    try:
-        dataset = dataset.sync_wait(num_batch=2, condition_name="every batch")
-    except Exception as e:
-        assert "name" in str(e)
-    dataset = dataset.batch(batch_size)
+    with pytest.raises(RuntimeError) as e:
+        dataset.sync_wait(num_batch=2, condition_name="every batch")
+    assert "Condition name is already in use" in str(e.value)
 
 
 def test_sync_exception_03():
@@ -209,12 +203,9 @@ def test_sync_exception_03():
 
     aug = Augment(0)
     # try to create dataset with batch_size < 0
-    try:
-        dataset = dataset.sync_wait(condition_name="every batch", num_batch=-1, callback=aug.update)
-    except Exception as e:
-        assert "num_batch" in str(e)
-
-    dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
+    with pytest.raises(ValueError) as e:
+        dataset.sync_wait(condition_name="every batch", num_batch=-1, callback=aug.update)
+    assert "num_batch need to be greater than 0." in str(e.value)
 
 
 def test_sync_exception_04():
@@ -230,14 +221,13 @@ def test_sync_exception_04():
     dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
     count = 0
-    try:
+    with pytest.raises(RuntimeError) as e:
         for _ in dataset.create_dict_iterator():
             count += 1
             data = {"loss": count}
-            # dataset.disable_sync()
             dataset.sync_update(condition_name="every batch", num_batch=-1, data=data)
-    except Exception as e:
-        assert "batch" in str(e)
+    assert "Sync_update batch size can only be positive" in str(e.value)
+
 
 def test_sync_exception_05():
     """
@@ -251,15 +241,15 @@ def test_sync_exception_05():
     # try to create dataset with batch_size < 0
     dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
-    try:
+    with pytest.raises(RuntimeError) as e:
         for _ in dataset.create_dict_iterator():
             dataset.disable_sync()
             count += 1
             data = {"loss": count}
             dataset.disable_sync()
             dataset.sync_update(condition_name="every", data=data)
-    except Exception as e:
-        assert "name" in str(e)
+    assert "Condition name not found" in str(e.value)
+
 
 if __name__ == "__main__":
     test_simple_sync_wait()
diff --git a/tests/ut/python/dataset/test_ten_crop.py b/tests/ut/python/dataset/test_ten_crop.py
index 7bffea5cc9..d196bc05cf 100644
--- a/tests/ut/python/dataset/test_ten_crop.py
+++ b/tests/ut/python/dataset/test_ten_crop.py
@@ -62,7 +62,7 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False):
         logger.info("dtype of image_2: {}".format(image_2.dtype))
 
         if plot:
-            visualize_list(np.array([image_1]*10), (image_2 * 255).astype(np.uint8).transpose(0, 2, 3, 1))
+            visualize_list(np.array([image_1] * 10), (image_2 * 255).astype(np.uint8).transpose(0, 2, 3, 1))
 
         # The output data should be of a 4D tensor shape, a stack of 10 images.
         assert len(image_2.shape) == 4
@@ -144,7 +144,7 @@ def test_ten_crop_invalid_size_error_msg():
             vision.TenCrop(0),
             lambda images: np.stack([vision.ToTensor()(image) for image in images])  # 4D stack of 10 images
         ]
-    error_msg = "Input is not within the required range"
+    error_msg = "Input is not within the required interval of (1 to 16777216)."
     assert error_msg == str(info.value)
 
     with pytest.raises(ValueError) as info:
diff --git a/tests/ut/python/dataset/test_text_basic_tokenizer.py b/tests/ut/python/dataset/test_text_basic_tokenizer.py
new file mode 100644
index 0000000000..822790fd60
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_basic_tokenizer.py
@@ -0,0 +1,138 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing BasicTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+BASIC_TOKENIZER_FILE = "../data/dataset/testTokenizerData/basic_tokenizer.txt"
+
+test_paras = [
+    dict(
+        first=1,
+        last=6,
+        expected_tokens=
+        [['Welcome', 'to', 'Beijing', '北', '京', '欢', '迎', '您'],
+         ['長', '風', '破', '浪', '會', '有', '時', '，', '直', '掛', '雲', '帆', '濟', '滄', '海'],
+         ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
+         ['明', '朝', '（', '1368', '—', '1644', '年', '）', '和', '清', '朝',
+          '（', '1644', '—', '1911', '年', '）', '，', '是', '中', '国', '封',
+          '建', '王', '朝', '史', '上', '最', '后', '两', '个', '朝', '代'],
+         ['明', '代', '（', '1368', '-', '1644', '）', 'と', '清', '代',
+          '（', '1644', '-', '1911', '）', 'は', '、', '中', '国', 'の', '封',
+          '建', '王', '朝', 'の', '歴', '史', 'における', '最', '後', 'の2つの', '王', '朝', 'でした'],
+         ['명나라', '(', '1368', '-', '1644', ')', '와', '청나라', '(', '1644', '-', '1911', ')', '는',
+          '중국', '봉건', '왕조의', '역사에서', '마지막', '두', '왕조였다']],
+        expected_offsets_start=[[0, 8, 11, 18, 21, 24, 27, 30],
+                                [0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42],
+                                [0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37],
+                                [0, 3, 6, 9, 13, 16, 20, 23, 26, 29, 32, 35, 38, 42, 45, 49,
+                                 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100],
+                                [0, 3, 6, 9, 13, 14, 18, 21, 24, 27, 30, 33, 37, 38, 42, 45, 48, 51,
+                                 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 93, 96, 99, 109, 112, 115],
+                                [0, 10, 11, 15, 16, 20, 21, 25, 35, 36, 40, 41, 45, 46, 50, 57, 64, 74, 87, 97, 101]],
+        expected_offsets_limit=[[7, 10, 18, 21, 24, 27, 30, 33],
+                                [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45],
+                                [4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40],
+                                [3, 6, 9, 13, 16, 20, 23, 26, 29, 32, 35, 38, 42, 45, 49, 52, 55, 58,
+                                 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103],
+                                [3, 6, 9, 13, 14, 18, 21, 24, 27, 30, 33, 37, 38, 42, 45, 48, 51, 54,
+                                 57, 60, 63, 66, 69, 72, 75, 78, 81, 93, 96, 99, 109, 112, 115, 124],
+                                [9, 11, 15, 16, 20, 21, 24, 34, 36, 40, 41, 45, 46, 49, 56, 63, 73, 86, 96, 100, 113]]
+    ),
+    dict(
+        first=7,
+        last=7,
+        expected_tokens=[['this', 'is', 'a', 'funky', 'string']],
+        expected_offsets_start=[[0, 5, 8, 10, 16]],
+        expected_offsets_limit=[[4, 7, 9, 15, 22]],
+        lower_case=True
+    ),
+]
+
+
+def check_basic_tokenizer_default(first, last, expected_tokens, expected_offsets_start, expected_offsets_limit,
+                                  lower_case=False, keep_whitespace=False,
+                                  normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+
+    basic_tokenizer = text.BasicTokenizer(lower_case=lower_case,
+                                          keep_whitespace=keep_whitespace,
+                                          normalization_form=normalization_form,
+                                          preserve_unused_token=preserve_unused_token)
+
+    dataset = dataset.map(operations=basic_tokenizer)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expected_tokens[count])
+        np.testing.assert_array_equal(token, expected_tokens[count])
+        count = count + 1
+
+
+def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_offsets_start, expected_offsets_limit,
+                                       lower_case=False, keep_whitespace=False,
+                                       normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+
+    basic_tokenizer = text.BasicTokenizer(lower_case=lower_case,
+                                          keep_whitespace=keep_whitespace,
+                                          normalization_form=normalization_form,
+                                          preserve_unused_token=preserve_unused_token,
+                                          with_offsets=True)
+
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=basic_tokenizer)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expected_tokens[count])
+        np.testing.assert_array_equal(token, expected_tokens[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+def test_basic_tokenizer_with_offsets():
+    """
+    Test BasicTokenizer
+    """
+    for paras in test_paras:
+        check_basic_tokenizer_with_offsets(**paras)
+
+
+def test_basic_tokenizer_default():
+    """
+    Test BasicTokenizer
+    """
+    for paras in test_paras:
+        check_basic_tokenizer_default(**paras)
+
+
+if __name__ == '__main__':
+    test_basic_tokenizer_default()
+    test_basic_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_bert_tokenizer.py b/tests/ut/python/dataset/test_text_bert_tokenizer.py
similarity index 51%
rename from tests/ut/python/dataset/test_bert_tokenizer.py
rename to tests/ut/python/dataset/test_text_bert_tokenizer.py
index ba487343a0..b29f94eb32 100644
--- a/tests/ut/python/dataset/test_bert_tokenizer.py
+++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py
@@ -18,7 +18,7 @@ Testing BertTokenizer op in DE
 import numpy as np
 import mindspore.dataset as ds
 from mindspore import log as logger
-import mindspore.dataset.text as nlp
+import mindspore.dataset.text as text
 
 BERT_TOKENIZER_FILE = "../data/dataset/testTokenizerData/bert_tokenizer.txt"
 
@@ -39,6 +39,14 @@ test_paras = [
                     ['疑', '是', '地', '上', '霜'],
                     ['举', '头', '望', '明', '月'],
                     ['低', '头', '思', '故', '乡']],
+        expected_offsets_start=[[0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12]],
+        expected_offsets_limit=[[3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15]],
         vocab_list=vocab_bert
     ),
     # test english text
@@ -46,6 +54,8 @@ test_paras = [
         first=5,
         last=5,
         expect_str=[['i', 'am', 'mak', '##ing', 'small', 'mistake', '##s', 'during', 'work', '##ing', 'hour', '##s']],
+        expected_offsets_start=[[0, 2, 5, 8, 12, 18, 25, 27, 34, 38, 42, 46]],
+        expected_offsets_limit=[[1, 4, 8, 11, 17, 25, 26, 33, 38, 41, 46, 47]],
         lower_case=True,
         vocab_list=vocab_bert
     ),
@@ -53,6 +63,8 @@ test_paras = [
         first=5,
         last=5,
         expect_str=[['I', "am", 'mak', '##ing', 'small', 'mistake', '##s', 'during', 'work', '##ing', 'hour', '##s']],
+        expected_offsets_start=[[0, 2, 5, 8, 12, 18, 25, 27, 34, 38, 42, 46]],
+        expected_offsets_limit=[[1, 4, 8, 11, 17, 25, 26, 33, 38, 41, 46, 47]],
         lower_case=False,
         vocab_list=vocab_bert
     ),
@@ -63,7 +75,9 @@ test_paras = [
         expect_str=[
             ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
             ['繁', '體', '字']],
-        normalization_form=nlp.utils.NormalizeForm.NFKC,
+        expected_offsets_start=[[0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37], [0, 3, 6]],
+        expected_offsets_limit=[[4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40], [3, 6, 9]],
+        normalization_form=text.utils.NormalizeForm.NFKC,
         vocab_list=vocab_bert
     ),
     # test preserved tokens
@@ -79,6 +93,8 @@ test_paras = [
             ['[unused1]'],
             ['[unused10]']
         ],
+        expected_offsets_start=[[0, 7], [0, 7], [0, 7], [0, 7], [0, 7], [0], [0]],
+        expected_offsets_limit=[[6, 12], [6, 12], [6, 12], [6, 12], [6, 13], [9], [10]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -95,6 +111,8 @@ test_paras = [
             ['[unused1]'],
             ['[unused10]']
         ],
+        expected_offsets_start=[[0, 7], [0, 7], [0, 7], [0, 7], [0, 7], [0], [0]],
+        expected_offsets_limit=[[6, 12], [6, 12], [6, 12], [6, 12], [6, 13], [9], [10]],
         lower_case=True,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -104,6 +122,8 @@ test_paras = [
         first=15,
         last=15,
         expect_str=[['12', '+', '/', '-', '28', '=', '40', '/', '-', '16']],
+        expected_offsets_start=[[0, 2, 3, 4, 5, 7, 8, 10, 11, 12]],
+        expected_offsets_limit=[[2, 3, 4, 5, 7, 8, 10, 11, 12, 14]],
         preserve_unused_token=True,
         vocab_list=vocab_bert
     ),
@@ -112,6 +132,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['[UNK]', ' ', '[CLS]']],
+        expected_offsets_start=[[0, 6, 7]],
+        expected_offsets_limit=[[6, 7, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -121,6 +143,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['unused', ' ', '[CLS]']],
+        expected_offsets_start=[[0, 6, 7]],
+        expected_offsets_limit=[[6, 7, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -131,6 +155,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['unused', ' ', '[', 'CLS', ']']],
+        expected_offsets_start=[[0, 6, 7, 8, 11]],
+        expected_offsets_limit=[[6, 7, 8, 11, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=False,
@@ -140,20 +166,20 @@ test_paras = [
 ]
 
 
-def check_bert_tokenizer(first, last, expect_str,
-                         vocab_list,
-                         suffix_indicator='##',
-                         max_bytes_per_token=100, unknown_token='[UNK]',
-                         lower_case=False, keep_whitespace=False,
-                         normalization_form=nlp.utils.NormalizeForm.NONE,
-                         preserve_unused_token=False):
+def check_bert_tokenizer_default(first, last, expect_str,
+                                 expected_offsets_start, expected_offsets_limit,
+                                 vocab_list, suffix_indicator='##',
+                                 max_bytes_per_token=100, unknown_token='[UNK]',
+                                 lower_case=False, keep_whitespace=False,
+                                 normalization_form=text.utils.NormalizeForm.NONE,
+                                 preserve_unused_token=False):
     dataset = ds.TextFileDataset(BERT_TOKENIZER_FILE, shuffle=False)
     if first > 1:
         dataset = dataset.skip(first - 1)
     if last >= first:
         dataset = dataset.take(last - first + 1)
-    vocab = nlp.Vocab.from_list(vocab_list)
-    tokenizer_op = nlp.BertTokenizer(
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.BertTokenizer(
         vocab=vocab, suffix_indicator=suffix_indicator,
         max_bytes_per_token=max_bytes_per_token, unknown_token=unknown_token,
         lower_case=lower_case, keep_whitespace=keep_whitespace,
@@ -162,20 +188,59 @@ def check_bert_tokenizer(first, last, expect_str,
     dataset = dataset.map(operations=tokenizer_op)
     count = 0
     for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
         logger.info("Exp:", expect_str[count])
-        np.testing.assert_array_equal(text, expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
         count = count + 1
 
 
-def test_bert_tokenizer():
+def check_bert_tokenizer_with_offsets(first, last, expect_str,
+                                      expected_offsets_start, expected_offsets_limit,
+                                      vocab_list, suffix_indicator='##',
+                                      max_bytes_per_token=100, unknown_token='[UNK]',
+                                      lower_case=False, keep_whitespace=False,
+                                      normalization_form=text.utils.NormalizeForm.NONE,
+                                      preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BERT_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.BertTokenizer(
+        vocab=vocab, suffix_indicator=suffix_indicator, max_bytes_per_token=max_bytes_per_token,
+        unknown_token=unknown_token, lower_case=lower_case, keep_whitespace=keep_whitespace,
+        normalization_form=normalization_form, preserve_unused_token=preserve_unused_token, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+
+def test_bert_tokenizer_default():
+    """
+    Test WordpieceTokenizer when with_offsets=False
+    """
+    for paras in test_paras:
+        check_bert_tokenizer_default(**paras)
+
+
+def test_bert_tokenizer_with_offsets():
     """
-    Test WordpieceTokenizer
+    Test WordpieceTokenizer when with_offsets=True
     """
     for paras in test_paras:
-        check_bert_tokenizer(**paras)
+        check_bert_tokenizer_with_offsets(**paras)
 
 
 if __name__ == '__main__':
-    test_bert_tokenizer()
+    test_bert_tokenizer_default()
+    test_bert_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py
new file mode 100644
index 0000000000..66665b61e6
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py
@@ -0,0 +1,471 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import mindspore.dataset as ds
+from mindspore.dataset.text import JiebaTokenizer
+from mindspore.dataset.text import JiebaMode, to_str
+
+DATA_FILE = "../data/dataset/testJiebaDataset/3.txt"
+DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*"
+
+HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8"
+MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8"
+
+
+def test_jieba_1():
+    """Test jieba tokenizer with MP mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    ret = []
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_1_1():
+    """Test jieba tokenizer with HMM mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_1_2():
+    """Test jieba tokenizer with HMM MIX"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2():
+    """Test add_word"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("男默女泪")
+    expect = ['男默女泪', '市', '长江大桥']
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2_1():
+    """Test add_word with freq"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("男默女泪", 10)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['男默女泪', '市', '长江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2_2():
+    """Test add_word with invalid None Input"""
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    try:
+        jieba_op.add_word(None)
+    except ValueError:
+        pass
+
+
+def test_jieba_2_3():
+    """Test add_word with freq, the value of freq affects the result of segmentation"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_3():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市', '长江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_3_1():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10,
+        "江大桥": 20000
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市长', '江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_4():
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
+    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(DICT_FILE)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_4_1():
+    """Test add dict with invalid file path"""
+    DICT_FILE = ""
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    try:
+        jieba_op.add_dict(DICT_FILE)
+    except ValueError:
+        pass
+
+
+def test_jieba_5():
+    """Test add dict with file path"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_with_offsets_1():
+    """Test jieba tokenizer with MP mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    ret = []
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_1_1():
+    """Test jieba tokenizer with HMM mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
+    expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45]
+    expected_offsets_limit = [6, 12, 15, 18, 21, 27, 33, 36, 42, 45, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_1_2():
+    """Test jieba tokenizer with HMM MIX"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2():
+    """Test add_word"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("男默女泪")
+    expect = ['男默女泪', '市', '长江大桥']
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2_1():
+    """Test add_word with freq"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("男默女泪", 10)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['男默女泪', '市', '长江大桥']
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2_2():
+    """Test add_word with freq, the value of freq affects the result of segmentation"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
+    expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_3():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市', '长江大桥']
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_3_1():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10,
+        "江大桥": 20000
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市长', '江大桥']
+    expected_offsets_start = [0, 12, 18]
+    expected_offsets_limit = [12, 18, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_4():
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
+    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(DICT_FILE)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_5():
+    """Test add dict with file path"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
+    expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+def gen():
+    text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S')
+    yield (text,)
+
+
+def pytoken_op(input_data):
+    te = str(to_str(input_data))
+    tokens = []
+    tokens.append(te[:5].encode("UTF8"))
+    tokens.append(te[5:10].encode("UTF8"))
+    tokens.append(te[10:].encode("UTF8"))
+    return np.array(tokens, dtype='S')
+
+
+def test_jieba_6():
+    data = ds.GeneratorDataset(gen, column_names=["text"])
+    data = data.map(input_columns=["text"],
+                    operations=pytoken_op, num_parallel_workers=1)
+    expect = ['今天天气太', '好了我们一', '起去外面玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+if __name__ == "__main__":
+    test_jieba_1()
+    test_jieba_1_1()
+    test_jieba_1_2()
+    test_jieba_2()
+    test_jieba_2_1()
+    test_jieba_2_2()
+    test_jieba_3()
+    test_jieba_3_1()
+    test_jieba_4()
+    test_jieba_4_1()
+    test_jieba_5()
+    test_jieba_5()
+    test_jieba_6()
+    test_jieba_with_offsets_1()
+    test_jieba_with_offsets_1_1()
+    test_jieba_with_offsets_1_2()
+    test_jieba_with_offsets_2()
+    test_jieba_with_offsets_2_1()
+    test_jieba_with_offsets_2_2()
+    test_jieba_with_offsets_3()
+    test_jieba_with_offsets_3_1()
+    test_jieba_with_offsets_4()
+    test_jieba_with_offsets_5()
diff --git a/tests/ut/python/dataset/test_text_tokenizer.py b/tests/ut/python/dataset/test_text_tokenizer.py
new file mode 100644
index 0000000000..2e2b7b741d
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_tokenizer.py
@@ -0,0 +1,380 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing UnicodeCharTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+DATA_FILE = "../data/dataset/testTokenizerData/1.txt"
+NORMALIZE_FILE = "../data/dataset/testTokenizerData/normalize.txt"
+REGEX_REPLACE_FILE = "../data/dataset/testTokenizerData/regex_replace.txt"
+REGEX_TOKENIZER_FILE = "../data/dataset/testTokenizerData/regex_tokenizer.txt"
+
+
+def split_by_unicode_char(input_strs):
+    """
+    Split utf-8 strings to unicode characters
+    """
+    out = []
+    for s in input_strs:
+        out.append([c for c in s])
+    return out
+
+
+def test_unicode_char_tokenizer_default():
+    """
+    Test UnicodeCharTokenizer
+    """
+    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeCharTokenizer()
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert split_by_unicode_char(input_strs) == tokens
+
+
+def test_unicode_char_tokenizer_with_offsets():
+    """
+    Test UnicodeCharTokenizer
+    """
+    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeCharTokenizer(with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+                              [0, 3, 6, 9, 12, 15], [0, 3, 6, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1]]
+    expected_offsets_limit = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+                              [3, 6, 9, 12, 15, 18], [3, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17], [1, 2]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is : {}".format(tokens))
+    assert split_by_unicode_char(input_strs) == tokens
+
+
+def test_whitespace_tokenizer_default():
+    """
+    Test WhitespaceTokenizer
+    """
+    whitespace_strs = [["Welcome", "to", "Beijing!"],
+                       ["北京欢迎您！"],
+                       ["我喜欢English!"],
+                       [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.WhitespaceTokenizer()
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert whitespace_strs == tokens
+
+
+def test_whitespace_tokenizer_with_offsets():
+    """
+    Test WhitespaceTokenizer
+    """
+    whitespace_strs = [["Welcome", "to", "Beijing!"],
+                       ["北京欢迎您！"],
+                       ["我喜欢English!"],
+                       [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.WhitespaceTokenizer(with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 8, 11], [0], [0], [0]]
+    expected_offsets_limit = [[7, 10, 19], [18], [17], [0]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+
+    logger.info("The out tokens is : {}".format(tokens))
+    assert whitespace_strs == tokens
+
+
+def test_unicode_script_tokenizer_default():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=False
+    """
+    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
+                           ["北京欢迎您", "！"],
+                           ["我喜欢", "English", "!"],
+                           [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False)
+    dataset = dataset.map(operations=tokenizer)
+
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert unicode_script_strs == tokens
+
+
+def test_unicode_script_tokenizer_default2():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=True
+    """
+    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
+                            ["北京欢迎您", "！"],
+                            ["我喜欢", "English", "!"],
+                            ["  "]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True)
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is :", tokens)
+    assert unicode_script_strs2 == tokens
+
+
+def test_unicode_script_tokenizer_with_offsets():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=False and with_offsets=True
+    """
+    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
+                           ["北京欢迎您", "！"],
+                           ["我喜欢", "English", "!"],
+                           [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]]
+    expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is : {}".format(tokens))
+    assert unicode_script_strs == tokens
+
+
+def test_unicode_script_tokenizer_with_offsets2():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=True and with_offsets=True
+    """
+    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
+                            ["北京欢迎您", "！"],
+                            ["我喜欢", "English", "!"],
+                            ["  "]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]]
+    expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is :", tokens)
+    assert unicode_script_strs2 == tokens
+
+
+def test_case_fold():
+    """
+    Test CaseFold
+    """
+    expect_strs = ["welcome to beijing!", "北京欢迎您!", "我喜欢english!", "  "]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    op = text.CaseFold()
+    dataset = dataset.map(operations=op)
+
+    lower_strs = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        lower_strs.append(token)
+    assert lower_strs == expect_strs
+
+
+def test_normalize_utf8():
+    """
+    Test NormalizeUTF8
+    """
+
+    def normalize(normalize_form):
+        dataset = ds.TextFileDataset(NORMALIZE_FILE, shuffle=False)
+        normalize = text.NormalizeUTF8(normalize_form=normalize_form)
+        dataset = dataset.map(operations=normalize)
+        out_bytes = []
+        out_texts = []
+        for i in dataset.create_dict_iterator():
+            out_bytes.append(i['text'])
+            out_texts.append(text.to_str(i['text']).tolist())
+        logger.info("The out bytes is : ", out_bytes)
+        logger.info("The out texts is: ", out_texts)
+        return out_bytes
+
+    expect_normlize_data = [
+        # NFC
+        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xe1\xba\x9b\xcc\xa3'],
+        # NFKC
+        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'fi', b'25', b'\xe1\xb9\xa9'],
+        # NFD
+        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xc5\xbf\xcc\xa3\xcc\x87'],
+        # NFKD
+        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'fi', b'25', b's\xcc\xa3\xcc\x87']
+    ]
+    assert normalize(text.utils.NormalizeForm.NFC) == expect_normlize_data[0]
+    assert normalize(text.utils.NormalizeForm.NFKC) == expect_normlize_data[1]
+    assert normalize(text.utils.NormalizeForm.NFD) == expect_normlize_data[2]
+    assert normalize(text.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
+
+
+def test_regex_replace():
+    """
+    Test RegexReplace
+    """
+
+    def regex_replace(first, last, expect_str, pattern, replace):
+        dataset = ds.TextFileDataset(REGEX_REPLACE_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        replace_op = text.RegexReplace(pattern, replace)
+        dataset = dataset.map(operations=replace_op)
+        out_text = []
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['text']).tolist()
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+        assert expect_str == out_text
+
+    regex_replace(1, 2, ['H____ W____', "L__'_ G_"], "\\p{Ll}", '_')
+    regex_replace(3, 5, ['hello', 'world', '31:beijing'], "^(\\d:|b:)", "")
+    regex_replace(6, 6, ["WelcometoChina!"], "\\s+", "")
+    regex_replace(7, 8, ['我不想长大', 'WelcometoShenzhen!'], "\\p{Cc}|\\p{Cf}|\\s+", "")
+
+
+def test_regex_tokenizer_default():
+    """
+    Test RegexTokenizer
+    """
+
+    def regex_tokenizer(first, last, expect_str, delim_pattern, keep_delim_pattern):
+        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern)
+        dataset = dataset.map(operations=tokenizer_op)
+        out_text = []
+        count = 0
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['text']).tolist()
+            np.testing.assert_array_equal(token, expect_str[count])
+            count += 1
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+
+    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], "\\s+", "")
+    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], "\\s+", "\\s+")
+    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], r"\p{Han}", r"\p{Han}")
+    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
+    regex_tokenizer(3, 3, [['12', '36']], r"[\p{P}|\p{S}]+", "")
+    regex_tokenizer(3, 3, [['￥+', '￥=?']], r"[\p{N}]+", "")
+
+
+def test_regex_tokenizer_with_offsets():
+    """
+    Test RegexTokenizer
+    """
+
+    def regex_tokenizer(first, last, expect_str, expected_offsets_start, expected_offsets_limit, delim_pattern,
+                        keep_delim_pattern):
+        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
+        dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                              columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+        out_text = []
+        count = 0
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['token']).tolist()
+            np.testing.assert_array_equal(token, expect_str[count])
+            np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+            np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+            count += 1
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+
+    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], [[0, 8, 11]], [[7, 10, 20]], "\\s+", "")
+    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], [[0, 7, 8, 10, 11]], [[7, 8, 10, 11, 20]],
+                    "\\s+", "\\s+")
+    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], [[0, 3, 6, 9, 12, 15]],
+                    [[3, 6, 9, 12, 15, 35]], r"\p{Han}", r"\p{Han}")
+    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], [[0, 2, 6, 8]], [[2, 6, 8, 13]],
+                    r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
+    regex_tokenizer(3, 3, [['12', '36']], [[0, 6]], [[2, 8]], r"[\p{P}|\p{S}]+", "")
+    regex_tokenizer(3, 3, [['￥+', '￥=?']], [[2, 8]], [[6, 13]], r"[\p{N}]+", "")
+
+
+if __name__ == '__main__':
+    test_unicode_char_tokenizer_default()
+    test_unicode_char_tokenizer_with_offsets()
+    test_whitespace_tokenizer_default()
+    test_whitespace_tokenizer_with_offsets()
+    test_unicode_script_tokenizer_default()
+    test_unicode_script_tokenizer_default2()
+    test_unicode_script_tokenizer_with_offsets()
+    test_unicode_script_tokenizer_with_offsets2()
+    test_case_fold()
+    test_normalize_utf8()
+    test_regex_replace()
+    test_regex_tokenizer_default()
+    test_regex_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py
new file mode 100644
index 0000000000..8b47ec971e
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py
@@ -0,0 +1,160 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing WordpieceTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+WORDPIECE_TOKENIZER_FILE = "../data/dataset/testTokenizerData/wordpiece_tokenizer.txt"
+
+vocab_english = [
+    "book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"
+]
+
+vocab_chinese = [
+    "我", '最', '喜', '欢', '的', '书', '是', '霍', '乱', '时', '期', '爱', '情'
+]
+
+vocab_mix = vocab_chinese + vocab_english
+
+test_paras = [
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
+                    ['era'], ['[UNK]']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4]],
+        vocab_list=vocab_english
+    ),
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
+                    ['era'], ['what']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4]],
+        vocab_list=vocab_english,
+        unknown_token=""
+    ),
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['[UNK]'], ['book'], ['is'], ['love'], ['[UNK]'], ['the'], ['[UNK]'], ['era'], ['[UNK]']],
+        expected_offsets_start=[[0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5], [4], [2], [4], [5], [3], [5], [3], [4]],
+        vocab_list=vocab_english,
+        max_bytes_per_token=4
+    ),
+    dict(
+        first=11,
+        last=25,
+        expect_str=[['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
+                    ['[UNK]']],
+        expected_offsets_start=[[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3]],
+        vocab_list=vocab_chinese,
+    ),
+    dict(
+        first=25,
+        last=25,
+        expect_str=[['您']],
+        expected_offsets_start=[[0]],
+        expected_offsets_limit=[[3]],
+        vocab_list=vocab_chinese,
+        unknown_token=""
+    ),
+    dict(
+        first=1,
+        last=25,
+        expect_str=[
+            ['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'], ['era'],
+            ['[UNK]'],
+            ['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
+            ['[UNK]']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0],
+                                [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4],
+                                [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3]],
+        vocab_list=vocab_mix,
+    ),
+]
+
+
+def check_wordpiece_tokenizer_default(first, last, expect_str, expected_offsets_start, expected_offsets_limit,
+                                      vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
+    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=unknown_token,
+                                           max_bytes_per_token=max_bytes_per_token)
+    dataset = dataset.map(operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        count = count + 1
+
+
+def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_offsets_start, expected_offsets_limit,
+                                           vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
+    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.WordpieceTokenizer(vocab=vocab, with_offsets=True, unknown_token=unknown_token,
+                                           max_bytes_per_token=max_bytes_per_token)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+
+def test_wordpiece_tokenizer_default():
+    """
+    Test WordpieceTokenizer
+    """
+    for paras in test_paras:
+        check_wordpiece_tokenizer_default(**paras)
+
+
+def test_wordpiece_tokenizer_with_offsets():
+    """
+    Test WordpieceTokenizer
+    """
+    for paras in test_paras:
+        check_wordpiece_tokenizer_with_offsets(**paras)
+
+
+if __name__ == '__main__':
+    test_wordpiece_tokenizer_default()
+    test_wordpiece_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_tfreader_op.py b/tests/ut/python/dataset/test_tfreader_op.py
index 5948b1e4c1..f57c387b35 100644
--- a/tests/ut/python/dataset/test_tfreader_op.py
+++ b/tests/ut/python/dataset/test_tfreader_op.py
@@ -12,21 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""
+Test TFRecordDataset Ops
+"""
 import numpy as np
 import pytest
-from util import save_and_check
 
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 from mindspore import log as logger
+from util import save_and_check_dict
 
 FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
 DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
 SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
+DATA_FILES2 = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
+SCHEMA_FILE2 = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
 GENERATE_GOLDEN = False
 
 
-def test_case_tf_shape():
+def test_tfrecord_shape():
+    logger.info("test_tfrecord_shape")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     ds1 = ds1.batch(2)
@@ -36,7 +45,8 @@ def test_case_tf_shape():
     assert len(output_shape[-1]) == 1
 
 
-def test_case_tf_read_all_dataset():
+def test_tfrecord_read_all_dataset():
+    logger.info("test_tfrecord_read_all_dataset")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaNoRow.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     assert ds1.get_dataset_size() == 12
@@ -46,7 +56,8 @@ def test_case_tf_read_all_dataset():
     assert count == 12
 
 
-def test_case_num_samples():
+def test_tfrecord_num_samples():
+    logger.info("test_tfrecord_num_samples")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema7Rows.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file, num_samples=8)
     assert ds1.get_dataset_size() == 8
@@ -56,7 +67,8 @@ def test_case_num_samples():
     assert count == 8
 
 
-def test_case_num_samples2():
+def test_tfrecord_num_samples2():
+    logger.info("test_tfrecord_num_samples2")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema7Rows.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     assert ds1.get_dataset_size() == 7
@@ -66,42 +78,41 @@ def test_case_num_samples2():
     assert count == 7
 
 
-def test_case_tf_shape_2():
+def test_tfrecord_shape2():
+    logger.info("test_tfrecord_shape2")
     ds1 = ds.TFRecordDataset(FILES, SCHEMA_FILE)
     ds1 = ds1.batch(2)
     output_shape = ds1.output_shapes()
     assert len(output_shape[-1]) == 2
 
 
-def test_case_tf_file():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_files_basic():
+    logger.info("test_tfrecord_files_basic")
 
     data = ds.TFRecordDataset(FILES, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
-    filename = "tfreader_result.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_files_basic.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_case_tf_file_no_schema():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_no_schema():
+    logger.info("test_tfrecord_no_schema")
 
     data = ds.TFRecordDataset(FILES, shuffle=ds.Shuffle.FILES)
-    filename = "tf_file_no_schema.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_no_schema.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_case_tf_file_pad():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_pad():
+    logger.info("test_tfrecord_pad")
 
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaPadBytes10.json"
     data = ds.TFRecordDataset(FILES, schema_file, shuffle=ds.Shuffle.FILES)
-    filename = "tf_file_padBytes10.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_pad_bytes10.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_tf_files():
+def test_tfrecord_read_files():
+    logger.info("test_tfrecord_read_files")
     pattern = DATASET_ROOT + "/test.data"
     data = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
     assert sum([1 for _ in data]) == 12
@@ -123,7 +134,19 @@ def test_tf_files():
     assert sum([1 for _ in data]) == 24
 
 
-def test_tf_record_schema():
+def test_tfrecord_multi_files():
+    logger.info("test_tfrecord_multi_files")
+    data1 = ds.TFRecordDataset(DATA_FILES2, SCHEMA_FILE2, shuffle=False)
+    data1 = data1.repeat(1)
+    num_iter = 0
+    for _ in data1.create_dict_iterator():
+        num_iter += 1
+
+    assert num_iter == 12
+
+
+def test_tfrecord_schema():
+    logger.info("test_tfrecord_schema")
     schema = ds.Schema()
     schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
     schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2])
@@ -142,7 +165,8 @@ def test_tf_record_schema():
             assert np.array_equal(t1, t2)
 
 
-def test_tf_record_shuffle():
+def test_tfrecord_shuffle():
+    logger.info("test_tfrecord_shuffle")
     ds.config.set_seed(1)
     data1 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.GLOBAL)
     data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
@@ -153,7 +177,8 @@ def test_tf_record_shuffle():
             assert np.array_equal(t1, t2)
 
 
-def test_tf_record_shard():
+def test_tfrecord_shard():
+    logger.info("test_tfrecord_shard")
     tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data",
                 "../data/dataset/tf_file_dataset/test3.data", "../data/dataset/tf_file_dataset/test4.data"]
 
@@ -181,7 +206,8 @@ def test_tf_record_shard():
     assert set(worker2_res) == set(worker1_res)
 
 
-def test_tf_shard_equal_rows():
+def test_tfrecord_shard_equal_rows():
+    logger.info("test_tfrecord_shard_equal_rows")
     tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data",
                 "../data/dataset/tf_file_dataset/test3.data", "../data/dataset/tf_file_dataset/test4.data"]
 
@@ -209,7 +235,8 @@ def test_tf_shard_equal_rows():
     assert len(worker4_res) == 40
 
 
-def test_case_tf_file_no_schema_columns_list():
+def test_tfrecord_no_schema_columns_list():
+    logger.info("test_tfrecord_no_schema_columns_list")
     data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"])
     row = data.create_dict_iterator().get_next()
     assert row["col_sint16"] == [-32768]
@@ -219,7 +246,8 @@ def test_case_tf_file_no_schema_columns_list():
     assert "col_sint32" in str(info.value)
 
 
-def test_tf_record_schema_columns_list():
+def test_tfrecord_schema_columns_list():
+    logger.info("test_tfrecord_schema_columns_list")
     schema = ds.Schema()
     schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
     schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2])
@@ -238,7 +266,8 @@ def test_tf_record_schema_columns_list():
     assert "col_sint32" in str(info.value)
 
 
-def test_case_invalid_files():
+def test_tfrecord_invalid_files():
+    logger.info("test_tfrecord_invalid_files")
     valid_file = "../data/dataset/testTFTestAllTypes/test.data"
     invalid_file = "../data/dataset/testTFTestAllTypes/invalidFile.txt"
     files = [invalid_file, valid_file, SCHEMA_FILE]
@@ -266,19 +295,20 @@ def test_case_invalid_files():
 
 
 if __name__ == '__main__':
-    test_case_tf_shape()
-    test_case_tf_read_all_dataset()
-    test_case_num_samples()
-    test_case_num_samples2()
-    test_case_tf_shape_2()
-    test_case_tf_file()
-    test_case_tf_file_no_schema()
-    test_case_tf_file_pad()
-    test_tf_files()
-    test_tf_record_schema()
-    test_tf_record_shuffle()
-    test_tf_record_shard()
-    test_tf_shard_equal_rows()
-    test_case_tf_file_no_schema_columns_list()
-    test_tf_record_schema_columns_list()
-    test_case_invalid_files()
+    test_tfrecord_shape()
+    test_tfrecord_read_all_dataset()
+    test_tfrecord_num_samples()
+    test_tfrecord_num_samples2()
+    test_tfrecord_shape2()
+    test_tfrecord_files_basic()
+    test_tfrecord_no_schema()
+    test_tfrecord_pad()
+    test_tfrecord_read_files()
+    test_tfrecord_multi_files()
+    test_tfrecord_schema()
+    test_tfrecord_shuffle()
+    test_tfrecord_shard()
+    test_tfrecord_shard_equal_rows()
+    test_tfrecord_no_schema_columns_list()
+    test_tfrecord_schema_columns_list()
+    test_tfrecord_invalid_files()
diff --git a/tests/ut/python/dataset/test_tokenizer.py b/tests/ut/python/dataset/test_tokenizer.py
deleted file mode 100644
index 2ec988d8dc..0000000000
--- a/tests/ut/python/dataset/test_tokenizer.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing UnicodeCharTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-DATA_FILE = "../data/dataset/testTokenizerData/1.txt"
-NORMALIZE_FILE = "../data/dataset/testTokenizerData/normalize.txt"
-REGEX_REPLACE_FILE = "../data/dataset/testTokenizerData/regex_replace.txt"
-REGEX_TOKENIZER_FILE = "../data/dataset/testTokenizerData/regex_tokenizer.txt"
-
-
-def split_by_unicode_char(input_strs):
-    """
-    Split utf-8 strings to unicode characters
-    """
-    out = []
-    for s in input_strs:
-        out.append([c for c in s])
-    return out
-
-
-def test_unicode_char_tokenizer():
-    """
-    Test UnicodeCharTokenizer
-    """
-    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeCharTokenizer()
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert split_by_unicode_char(input_strs) == tokens
-
-
-def test_whitespace_tokenizer():
-    """
-    Test WhitespaceTokenizer
-    """
-    whitespace_strs = [["Welcome", "to", "Beijing!"],
-                       ["北京欢迎您！"],
-                       ["我喜欢English!"],
-                       [""]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.WhitespaceTokenizer()
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert whitespace_strs == tokens
-
-
-def test_unicode_script_tokenizer():
-    """
-    Test UnicodeScriptTokenizer when para keep_whitespace=False
-    """
-    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
-                           ["北京欢迎您", "！"],
-                           ["我喜欢", "English", "!"],
-                           [""]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeScriptTokenizer(keep_whitespace=False)
-    dataset = dataset.map(operations=tokenizer)
-
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert unicode_script_strs == tokens
-
-
-def test_unicode_script_tokenizer2():
-    """
-    Test UnicodeScriptTokenizer when para keep_whitespace=True
-    """
-    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
-                            ["北京欢迎您", "！"],
-                            ["我喜欢", "English", "!"],
-                            ["  "]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeScriptTokenizer(keep_whitespace=True)
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is :", tokens)
-    assert unicode_script_strs2 == tokens
-
-
-def test_case_fold():
-    """
-    Test CaseFold
-    """
-    expect_strs = ["welcome to beijing!", "北京欢迎您!", "我喜欢english!", "  "]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    op = nlp.CaseFold()
-    dataset = dataset.map(operations=op)
-
-    lower_strs = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        lower_strs.append(text)
-    assert lower_strs == expect_strs
-
-
-def test_normalize_utf8():
-    """
-    Test NormalizeUTF8
-    """
-
-    def normalize(normalize_form):
-        dataset = ds.TextFileDataset(NORMALIZE_FILE, shuffle=False)
-        normalize = nlp.NormalizeUTF8(normalize_form=normalize_form)
-        dataset = dataset.map(operations=normalize)
-        out_bytes = []
-        out_texts = []
-        for i in dataset.create_dict_iterator():
-            out_bytes.append(i['text'])
-            out_texts.append(nlp.to_str(i['text']).tolist())
-        logger.info("The out bytes is : ", out_bytes)
-        logger.info("The out texts is: ", out_texts)
-        return out_bytes
-
-    expect_normlize_data = [
-        # NFC
-        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xe1\xba\x9b\xcc\xa3'],
-        # NFKC
-        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'fi', b'25', b'\xe1\xb9\xa9'],
-        # NFD
-        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xc5\xbf\xcc\xa3\xcc\x87'],
-        # NFKD
-        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'fi', b'25', b's\xcc\xa3\xcc\x87']
-    ]
-    assert normalize(nlp.utils.NormalizeForm.NFC) == expect_normlize_data[0]
-    assert normalize(nlp.utils.NormalizeForm.NFKC) == expect_normlize_data[1]
-    assert normalize(nlp.utils.NormalizeForm.NFD) == expect_normlize_data[2]
-    assert normalize(nlp.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
-
-
-def test_regex_replace():
-    """
-    Test RegexReplace
-    """
-
-    def regex_replace(first, last, expect_str, pattern, replace):
-        dataset = ds.TextFileDataset(REGEX_REPLACE_FILE, shuffle=False)
-        if first > 1:
-            dataset = dataset.skip(first - 1)
-        if last >= first:
-            dataset = dataset.take(last - first + 1)
-        replace_op = nlp.RegexReplace(pattern, replace)
-        dataset = dataset.map(operations=replace_op)
-        out_text = []
-        for i in dataset.create_dict_iterator():
-            text = nlp.to_str(i['text']).tolist()
-            out_text.append(text)
-        logger.info("Out:", out_text)
-        logger.info("Exp:", expect_str)
-        assert expect_str == out_text
-
-    regex_replace(1, 2, ['H____ W____', "L__'_ G_"], "\\p{Ll}", '_')
-    regex_replace(3, 5, ['hello', 'world', '31:beijing'], "^(\\d:|b:)", "")
-    regex_replace(6, 6, ["WelcometoChina!"], "\\s+", "")
-    regex_replace(7, 8, ['我不想长大', 'WelcometoShenzhen!'], "\\p{Cc}|\\p{Cf}|\\s+", "")
-
-
-def test_regex_tokenizer():
-    """
-    Test RegexTokenizer
-    """
-
-    def regex_tokenizer(first, last, expect_str, delim_pattern, keep_delim_pattern):
-        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
-        if first > 1:
-            dataset = dataset.skip(first - 1)
-        if last >= first:
-            dataset = dataset.take(last - first + 1)
-        tokenizer_op = nlp.RegexTokenizer(delim_pattern, keep_delim_pattern)
-        dataset = dataset.map(operations=tokenizer_op)
-        out_text = []
-        count = 0
-        for i in dataset.create_dict_iterator():
-            text = nlp.to_str(i['text']).tolist()
-            np.testing.assert_array_equal(text, expect_str[count])
-            count += 1
-            out_text.append(text)
-        logger.info("Out:", out_text)
-        logger.info("Exp:", expect_str)
-
-    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], "\\s+", "")
-    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], "\\s+", "\\s+")
-    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], r"\p{Han}", r"\p{Han}")
-    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
-    regex_tokenizer(3, 3, [['12', '36']], r"[\p{P}|\p{S}]+", "")
-    regex_tokenizer(3, 3, [['￥+', '￥=?']], r"[\p{N}]+", "")
-
-
-if __name__ == '__main__':
-    test_unicode_char_tokenizer()
-    test_whitespace_tokenizer()
-    test_unicode_script_tokenizer()
-    test_unicode_script_tokenizer2()
-    test_case_fold()
-    test_normalize_utf8()
-    test_regex_replace()
-    test_regex_tokenizer()
diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py
index a26b647265..e5b66696ea 100644
--- a/tests/ut/python/dataset/test_uniform_augment.py
+++ b/tests/ut/python/dataset/test_uniform_augment.py
@@ -16,6 +16,7 @@
 Testing UniformAugment in DE
 """
 import numpy as np
+import pytest
 
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
@@ -164,12 +165,13 @@ def test_cpp_uniform_augment_exception_pyops(num_ops=2):
                      C.RandomRotation(degrees=45),
                      F.Invert()]
 
-    try:
+    with pytest.raises(TypeError) as e:
         _ = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
 
-    except Exception as e:
-        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "operations" in str(e)
+    logger.info("Got an exception in DE: {}".format(str(e)))
+    assert "Argument tensor_op_5 with value" \
+           " <mindspore.dataset.transforms.vision.py_transforms.Invert" in str(e.value)
+    assert "is not of type (<class 'mindspore._c_dataengine.TensorOp'>,)" in str(e.value)
 
 
 def test_cpp_uniform_augment_exception_large_numops(num_ops=6):
@@ -209,7 +211,7 @@ def test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0):
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "num_ops" in str(e)
+        assert "Input num_ops must be greater than 0" in str(e)
 
 
 def test_cpp_uniform_augment_exception_float_numops(num_ops=2.5):
@@ -229,7 +231,7 @@ def test_cpp_uniform_augment_exception_float_numops(num_ops=2.5):
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "integer" in str(e)
+        assert "Argument num_ops with value 2.5 is not of type (<class 'int'>,)" in str(e)
 
 
 def test_cpp_uniform_augment_random_crop_badinput(num_ops=1):
diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py
index 35411e5c80..0545181360 100644
--- a/tests/ut/python/dataset/test_vocab.py
+++ b/tests/ut/python/dataset/test_vocab.py
@@ -26,7 +26,7 @@ SIMPLE_VOCAB_FILE = "../data/dataset/testVocab/simple_vocab_list.txt"
 
 def test_from_list_tutorial():
     vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", "<unk>"], True)
-    lookup = text.Lookup(vocab)
+    lookup = text.Lookup(vocab, "<unk>")
     data = ds.TextFileDataset(DATA_FILE, shuffle=False)
     data = data.map(input_columns=["text"], operations=lookup)
     ind = 0
@@ -50,7 +50,7 @@ def test_from_file_tutorial():
 
 def test_from_dict_tutorial():
     vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
-    lookup = text.Lookup(vocab, 6)  # default value is -1
+    lookup = text.Lookup(vocab, "<unk>")  # any unknown token will be mapped to the id of <unk>
     data = ds.TextFileDataset(DATA_FILE, shuffle=False)
     data = data.map(input_columns=["text"], operations=lookup)
     res = [3, 6, 2, 4, 5, 6]
@@ -60,33 +60,51 @@ def test_from_dict_tutorial():
         ind += 1
 
 
+def test_from_dict_exception():
+    try:
+        vocab = text.Vocab.from_dict({"home": -1, "behind": 0})
+        if not vocab:
+            raise ValueError("Vocab is None")
+    except ValueError as e:
+        assert "is not within the required interval" in str(e)
+
+
 def test_from_list():
     def gen(texts):
         for word in texts.split(" "):
             yield (np.array(word, dtype='S'),)
 
-    def test_config(lookup_str, vocab_input, special_tokens, special_first):
+    def test_config(lookup_str, vocab_input, special_tokens, special_first, unknown_token):
         try:
             vocab = text.Vocab.from_list(vocab_input, special_tokens, special_first)
             data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"])
-            data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+            data = data.map(input_columns=["text"], operations=text.Lookup(vocab, unknown_token))
             res = []
             for d in data.create_dict_iterator():
                 res.append(d["text"].item())
             return res
-        except ValueError as e:
+        except (ValueError, RuntimeError, TypeError) as e:
             return str(e)
 
+    # test basic default config, special_token=None, unknown_token=None
+    assert test_config("w1 w2 w3", ["w1", "w2", "w3"], None, True, None) == [0, 1, 2]
     # test normal operations
-    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], True) == [2, 3, 4, 0, 1]
-    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], False) == [0, 1, 2, 3, 4]
-    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, True) == [2, 1, 0]
-    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, False) == [2, 1, 0]
+    assert test_config("w1 w2 w3 s1 s2 ephemeral", ["w1", "w2", "w3"], ["s1", "s2"], True, "s2") == [2, 3, 4, 0, 1, 1]
+    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], False, "s2") == [0, 1, 2, 3, 4]
+    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, True, "w1") == [2, 1, 0]
+    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, False, "w1") == [2, 1, 0]
+    # test unknown token lookup
+    assert test_config("w1 un1 w3 un2", ["w1", "w2", "w3"], ["<pad>", "<unk>"], True, "<unk>") == [2, 1, 4, 1]
+    assert test_config("w1 un1 w3 un2", ["w1", "w2", "w3"], ["<pad>", "<unk>"], False, "<unk>") == [0, 4, 2, 4]
 
     # test exceptions
-    assert "word_list contains duplicate" in test_config("w1", ["w1", "w1"], [], True)
-    assert "special_tokens contains duplicate" in test_config("w1", ["w1", "w2"], ["s1", "s1"], True)
-    assert "special_tokens and word_list contain duplicate" in test_config("w1", ["w1", "w2"], ["s1", "w1"], True)
+    assert "doesn't exist in vocab." in test_config("un1", ["w1"], [], False, "unk")
+    assert "doesn't exist in vocab and no unknown token is specified." in test_config("un1", ["w1"], [], False, None)
+    assert "doesn't exist in vocab" in test_config("un1", ["w1"], [], False, None)
+    assert "word_list contains duplicate" in test_config("w1", ["w1", "w1"], [], True, "w1")
+    assert "special_tokens contains duplicate" in test_config("w1", ["w1", "w2"], ["s1", "s1"], True, "w1")
+    assert "special_tokens and word_list contain duplicate" in test_config("w1", ["w1", "w2"], ["s1", "w1"], True, "w1")
+    assert "is not of type" in test_config("w1", ["w1", "w2"], ["s1"], True, 123)
 
 
 def test_from_file():
@@ -99,7 +117,7 @@ def test_from_file():
             vocab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens,
                                          special_first=special_first)
             data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"])
-            data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+            data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "s2"))
             res = []
             for d in data.create_dict_iterator():
                 res.append(d["text"].item())
@@ -118,6 +136,7 @@ def test_from_file():
 
 
 if __name__ == '__main__':
+    test_from_dict_exception()
     test_from_list_tutorial()
     test_from_file_tutorial()
     test_from_dict_tutorial()
diff --git a/tests/ut/python/dataset/test_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_wordpiece_tokenizer.py
deleted file mode 100644
index 7934884740..0000000000
--- a/tests/ut/python/dataset/test_wordpiece_tokenizer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing WordpieceTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-WORDPIECE_TOKENIZER_FILE = "../data/dataset/testTokenizerData/wordpiece_tokenizer.txt"
-
-vocab_english = [
-    "book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"
-]
-
-vocab_chinese = [
-    "我", '最', '喜', '欢', '的', '书', '是', '霍', '乱', '时', '期', '爱', '情'
-]
-
-vocab_mix = vocab_chinese + vocab_english
-
-test_paras = [
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
-                    ['era'], ['[UNK]']],
-        vocab_list=vocab_english
-    ),
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
-                    ['era'], ['what']],
-        vocab_list=vocab_english,
-        unknown_token=""
-    ),
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['[UNK]'], ['book'], ['is'], ['love'], ['[UNK]'], ['the'], ['[UNK]'], ['era'], ['[UNK]']],
-        vocab_list=vocab_english,
-        max_bytes_per_token=4
-    ),
-    dict(
-        first=11,
-        last=25,
-        expect_str=[['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
-                    ['[UNK]']],
-        vocab_list=vocab_chinese,
-    ),
-    dict(
-        first=25,
-        last=25,
-        expect_str=[['您']],
-        vocab_list=vocab_chinese,
-        unknown_token=""
-    ),
-    dict(
-        first=1,
-        last=25,
-        expect_str=[
-            ['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'], ['era'],
-            ['[UNK]'],
-            ['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
-            ['[UNK]']],
-        vocab_list=vocab_mix,
-    ),
-]
-
-
-def check_wordpiece_tokenizer(first, last, expect_str, vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
-    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
-    if first > 1:
-        dataset = dataset.skip(first - 1)
-    if last >= first:
-        dataset = dataset.take(last - first + 1)
-    vocab = nlp.Vocab.from_list(vocab_list)
-    tokenizer_op = nlp.WordpieceTokenizer(vocab=vocab, unknown_token=unknown_token,
-                                          max_bytes_per_token=max_bytes_per_token)
-    dataset = dataset.map(operations=tokenizer_op)
-    count = 0
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
-        logger.info("Exp:", expect_str[count])
-        np.testing.assert_array_equal(text, expect_str[count])
-        count = count + 1
-
-
-def test_wordpiece_tokenizer():
-    """
-    Test WordpieceTokenizer
-    """
-    for paras in test_paras:
-        check_wordpiece_tokenizer(**paras)
-
-
-if __name__ == '__main__':
-    test_wordpiece_tokenizer()
diff --git a/tests/ut/python/dataset/util.py b/tests/ut/python/dataset/util.py
index 2a8e93cd0b..11c5735406 100644
--- a/tests/ut/python/dataset/util.py
+++ b/tests/ut/python/dataset/util.py
@@ -288,12 +288,13 @@ def config_get_set_num_parallel_workers(num_parallel_workers_new):
     return num_parallel_workers_original
 
 
-def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
+def visualize_with_bounding_boxes(orig, aug, annot_name="annotation", plot_rows=3):
     """
     Take a list of un-augmented and augmented images with "annotation" bounding boxes
     Plot images to compare test correct BBox augment functionality
     :param orig: list of original images and bboxes (without aug)
     :param aug: list of augmented images and bboxes
+    :param annot_name: the dict key for bboxes in data, e.g "bbox" (COCO) / "annotation" (VOC)
     :param plot_rows: number of rows on plot (rows = samples on one plot)
     :return: None
     """
@@ -301,9 +302,10 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
     def add_bounding_boxes(ax, bboxes):
         for bbox in bboxes:
             rect = patches.Rectangle((bbox[0], bbox[1]),
-                                     bbox[2], bbox[3],
-                                     linewidth=1, edgecolor='r', facecolor='none')
+                                     bbox[2]*0.997, bbox[3]*0.997,
+                                     linewidth=1.80, edgecolor='r', facecolor='none')
             # Add the patch to the Axes
+            # Params to Rectangle slightly modified to prevent drawing overflow
             ax.add_patch(rect)
 
     # Quick check to confirm correct input parameters
@@ -312,14 +314,15 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
     if len(orig) != len(aug) or not orig:
         return
 
-    batch_size = int(len(orig)/plot_rows)  # creates batches of images to plot together
+    batch_size = int(len(orig) / plot_rows)  # creates batches of images to plot together
     split_point = batch_size * plot_rows
 
     orig, aug = np.array(orig), np.array(aug)
 
     if len(orig) > plot_rows:
         # Create batches of required size and add remainder to last batch
-        orig = np.split(orig[:split_point], batch_size) + ([orig[split_point:]] if (split_point < orig.shape[0]) else [])  # check to avoid empty arrays being added
+        orig = np.split(orig[:split_point], batch_size) + (
+            [orig[split_point:]] if (split_point < orig.shape[0]) else [])  # check to avoid empty arrays being added
         aug = np.split(aug[:split_point], batch_size) + ([aug[split_point:]] if (split_point < aug.shape[0]) else [])
     else:
         orig = [orig]
@@ -334,18 +337,19 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
 
         for x, (dataA, dataB) in enumerate(zip(allData[0], allData[1])):
             cur_ix = base_ix + x
-            (axA, axB) = (axs[x, 0], axs[x, 1]) if (curPlot > 1) else (axs[0], axs[1])  # select plotting axes based on number of image rows on plot - else case when 1 row
+            # select plotting axes based on number of image rows on plot - else case when 1 row
+            (axA, axB) = (axs[x, 0], axs[x, 1]) if (curPlot > 1) else (axs[0], axs[1])
 
             axA.imshow(dataA["image"])
-            add_bounding_boxes(axA, dataA["annotation"])
+            add_bounding_boxes(axA, dataA[annot_name])
             axA.title.set_text("Original" + str(cur_ix+1))
 
             axB.imshow(dataB["image"])
-            add_bounding_boxes(axB, dataB["annotation"])
+            add_bounding_boxes(axB, dataB[annot_name])
             axB.title.set_text("Augmented" + str(cur_ix+1))
 
-            logger.info("Original **\n{} : {}".format(str(cur_ix+1), dataA["annotation"]))
-            logger.info("Augmented **\n{} : {}\n".format(str(cur_ix+1), dataB["annotation"]))
+            logger.info("Original **\n{} : {}".format(str(cur_ix+1), dataA[annot_name]))
+            logger.info("Augmented **\n{} : {}\n".format(str(cur_ix+1), dataB[annot_name]))
 
         plt.show()
 
@@ -381,19 +385,19 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error):
         width = img.shape[1]
         if invalid_bbox_type_ == InvalidBBoxType.WidthOverflow:
             # use box that overflows on width
-            return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.HeightOverflow:
             # use box that overflows on height
-            return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.NegativeXY:
             # use box with negative xy
-            return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.WrongShape:
             # use box that has incorrect shape
-            return img, np.array([[0, 0, width - 1]]).astype(np.uint32)
+            return img, np.array([[0, 0, width - 1]]).astype(np.float32)
         return img, bboxes
 
     try:
diff --git a/tests/ut/python/ir/test_indexed_slices.py b/tests/ut/python/ir/test_indexed_slices.py
index 8690183090..36dfe464cb 100644
--- a/tests/ut/python/ir/test_indexed_slices.py
+++ b/tests/ut/python/ir/test_indexed_slices.py
@@ -36,6 +36,8 @@ from mindspore._checkparam import Rel
 from mindspore.nn import Optimizer
 from mindspore.nn import TrainOneStepCell, WithLossCell
 
+context.set_context(mode=context.GRAPH_MODE, enable_sparse=True)
+
 reduce_sum = P.ReduceSum()
 unsorted_segment_sum = P.UnsortedSegmentSum()
 transpose = P.Transpose()
@@ -44,7 +46,6 @@ reshape = P.Reshape()
 size_op = P.Size()
 invert_permutation = P.InvertPermutation()
 logical_and = P.LogicalAnd()
-context.set_context(mode=context.GRAPH_MODE, enable_sparse=True)
 
 @constexpr
 def _generate_shape_index(out_shape, indices_shape, axis):
@@ -103,10 +104,15 @@ def get_bprop_sparse_gather_v2(self):
 
 adam_opt_for_map = C.MultitypeFuncGraph("adam_opt_for_map")
 @adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
-                           "Tensor", "Tensor", "Tensor", "Undetermined", "Bool")
-def _update_run_op_for_map(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag):
-    if gradient.is_indexed_slices():
-        return gradient.values()
+                           "Tensor", "Tensor", "Tensor", "IndexedSlices", "Bool")
+def _update_run_op_for_map_indexed_slices(beta1, beta2, eps, lr, weight_decay_tensor, param,
+                                          m, v, gradient, decay_flag):
+    return gradient.values()
+
+@adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                           "Tensor", "Tensor", "Tensor", "Tensor", "Bool")
+def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param,
+                                  m, v, gradient, decay_flag):
     op_mul = P.Mul()
     op_square = P.Square()
     op_sqrt = P.Sqrt()
@@ -182,7 +188,7 @@ def test_indexed_slices_make_indexed_slices():
             self.dense_shape = (3, 4)
         def construct(self, indices, values):
             ret = (IndexedSlices(indices, values, self.dense_shape),)
-            return ret[0].is_indexed_slices()
+            return ret[0]
     indices = Tensor([[0, 0], [1, 2]])
     values = Tensor([1, 2], dtype=ms.float32)
     MakeIndexedSlices()(indices, values)
@@ -209,7 +215,7 @@ def test_indexed_slices_sparse_gatherv2_grad_all():
             self.network = network
         def construct(self, x, y):
             grad = grad_all(self.network)(x, y)
-            return grad, grad[0].is_indexed_slices(), grad[1].is_indexed_slices()
+            return grad, grad[0], grad[1]
     class SparseGatherV2(nn.Cell):
         def __init__(self):
             super(SparseGatherV2, self).__init__()
@@ -233,14 +239,13 @@ def test_indexed_slices_sparse_gatherv2_grad_with_pram():
             weights = self.weights
             grad = grad_by_list(self.network, weights)(x)
             x = grad[0]
-            return x.is_indexed_slices(), x.values(), x.indices(), x.dense_shape()
+            return x, x.values(), x.indices(), x.dense_shape()
     class SparseGatherV2(nn.Cell):
         def __init__(self):
             super(SparseGatherV2, self).__init__()
             self.sparse_gatherv2 = MySparseGatherV2()
             self.axis = 0
-            self.params = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.int32)),
-                                    name="params", has_indexed_slices_grad=True)
+            self.params = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.int32)), name="params")
         def construct(self, indices):
             return self.sparse_gatherv2(self.params, indices, self.axis)
     indices = Tensor(np.array([0, 1]).astype(np.int32))
@@ -248,20 +253,6 @@ def test_indexed_slices_sparse_gatherv2_grad_with_pram():
     network(indices)
 
 
-def test_indexed_slices_is_indexed_slices():
-    class MakeIndexedSlices(nn.Cell):
-        def __init__(self):
-            super(MakeIndexedSlices, self).__init__()
-            self.dense_shape = (3, 4)
-        def construct(self, indices, values):
-            indexed_slices = IndexedSlices(indices, values, self.dense_shape)
-            ret = indexed_slices.is_indexed_slices()
-            return ret
-    indices = Tensor([[0, 0], [1, 2]])
-    values = Tensor([1, 2], dtype=ms.float32)
-    MakeIndexedSlices()(indices, values)
-
-
 def test_indexed_slices_env_get():
     class Loss(nn.Cell):
         def __init__(self):
@@ -271,7 +262,7 @@ def test_indexed_slices_env_get():
     class NetWithSparseGatherV2(nn.Cell):
         def __init__(self):
             super(NetWithSparseGatherV2, self).__init__()
-            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", has_indexed_slices_grad=True)
+            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1")
             self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
             self.gatherv2 = MySparseGatherV2()
             self.axis = 0
diff --git a/tests/ut/python/model/resnet.py b/tests/ut/python/model/resnet.py
new file mode 100644
index 0000000000..001e1db0cf
--- /dev/null
+++ b/tests/ut/python/model/resnet.py
@@ -0,0 +1,282 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ResNet."""
+import numpy as np
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.common.tensor import Tensor
+
+
+def _weight_variable(shape, factor=0.01):
+    init_value = np.random.randn(*shape).astype(np.float32) * factor
+    return Tensor(init_value)
+
+
+def _conv3x3(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 3, 3)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv1x1(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 1, 1)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv7x7(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 7, 7)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _bn(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _bn_last(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _fc(in_channel, out_channel):
+    weight_shape = (out_channel, in_channel)
+    weight = _weight_variable(weight_shape)
+    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
+
+
+class ResidualBlock(nn.Cell):
+    """
+    ResNet V1 residual block definition.
+
+    Args:
+        in_channel (int): Input channel.
+        out_channel (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlock(3, 256, stride=2)
+    """
+    expansion = 4
+
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride=1):
+        super(ResidualBlock, self).__init__()
+
+        channel = out_channel // self.expansion
+        self.conv1 = _conv1x1(in_channel, channel, stride=1)
+        self.bn1 = _bn(channel)
+
+        self.conv2 = _conv3x3(channel, channel, stride=stride)
+        self.bn2 = _bn(channel)
+
+        self.conv3 = _conv1x1(channel, out_channel, stride=1)
+        self.bn3 = _bn_last(out_channel)
+
+        self.relu = nn.ReLU()
+
+        self.down_sample = False
+
+        if stride != 1 or in_channel != out_channel:
+            self.down_sample = True
+        self.down_sample_layer = None
+
+        if self.down_sample:
+            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),
+                                                        _bn(out_channel)])
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.down_sample:
+            identity = self.down_sample_layer(identity)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Cell):
+    """
+    ResNet architecture.
+
+    Args:
+        block (Cell): Block for network.
+        layer_nums (list): Numbers of block in different layers.
+        in_channels (list): Input channel in each layer.
+        out_channels (list): Output channel in each layer.
+        strides (list):  Stride size in each layer.
+        num_classes (int): The number of classes that the training images are belonging to.
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResNet(ResidualBlock,
+        >>>        [3, 4, 6, 3],
+        >>>        [64, 256, 512, 1024],
+        >>>        [256, 512, 1024, 2048],
+        >>>        [1, 2, 2, 2],
+        >>>        10)
+    """
+
+    def __init__(self,
+                 block,
+                 layer_nums,
+                 in_channels,
+                 out_channels,
+                 strides,
+                 num_classes):
+        super(ResNet, self).__init__()
+
+        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
+            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
+
+        self.conv1 = _conv7x7(3, 64, stride=2)
+        self.bn1 = _bn(64)
+        self.relu = P.ReLU()
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block,
+                                       layer_nums[0],
+                                       in_channel=in_channels[0],
+                                       out_channel=out_channels[0],
+                                       stride=strides[0])
+        self.layer2 = self._make_layer(block,
+                                       layer_nums[1],
+                                       in_channel=in_channels[1],
+                                       out_channel=out_channels[1],
+                                       stride=strides[1])
+        self.layer3 = self._make_layer(block,
+                                       layer_nums[2],
+                                       in_channel=in_channels[2],
+                                       out_channel=out_channels[2],
+                                       stride=strides[2])
+        self.layer4 = self._make_layer(block,
+                                       layer_nums[3],
+                                       in_channel=in_channels[3],
+                                       out_channel=out_channels[3],
+                                       stride=strides[3])
+
+        self.mean = P.ReduceMean(keep_dims=True)
+        self.flatten = nn.Flatten()
+        self.end_point = _fc(out_channels[3], num_classes)
+
+    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
+        """
+        Make stage network of ResNet.
+
+        Args:
+            block (Cell): Resnet block.
+            layer_num (int): Layer number.
+            in_channel (int): Input channel.
+            out_channel (int): Output channel.
+            stride (int): Stride size for the first convolutional layer.
+
+        Returns:
+            SequentialCell, the output layer.
+
+        Examples:
+            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
+        """
+        layers = []
+
+        resnet_block = block(in_channel, out_channel, stride=stride)
+        layers.append(resnet_block)
+
+        for _ in range(1, layer_num):
+            resnet_block = block(out_channel, out_channel, stride=1)
+            layers.append(resnet_block)
+
+        return nn.SequentialCell(layers)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        c1 = self.maxpool(x)
+
+        c2 = self.layer1(c1)
+        c3 = self.layer2(c2)
+        c4 = self.layer3(c3)
+        c5 = self.layer4(c4)
+
+        out = self.mean(c5, (2, 3))
+        out = self.flatten(out)
+        out = self.end_point(out)
+
+        return out
+
+
+def resnet50(class_num=10):
+    """
+    Get ResNet50 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet50 neural network.
+
+    Examples:
+        >>> net = resnet50(10)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 6, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
+
+def resnet101(class_num=1001):
+    """
+    Get ResNet101 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet101 neural network.
+
+    Examples:
+        >>> net = resnet101(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 23, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py
index d0e77f901a..f1fc2cc2f7 100644
--- a/tests/ut/python/model/test_mix_precision.py
+++ b/tests/ut/python/model/test_mix_precision.py
@@ -219,3 +219,31 @@ def test_dict_cast():
     y = Tensor(np.array([4, 5.5, 6.5]), mstype.float32)
     net = FirstNet()
     net(x, y)
+
+
+def test_kwarg_cast():
+    class FirstNet(nn.Cell):
+        def __init__(self):
+            super(FirstNet, self).__init__()
+            self.net = SecondNet().add_flags_recursive(fp16=True)
+            self.add = P.TensorAdd()
+
+        def construct(self, tensor_a, tensor_b):
+            tensor_c = self.add(tensor_a, tensor_b)
+            dictionary = {"key": tensor_a}
+            result = self.net(key1=tensor_c, key2=dictionary)
+            return result
+
+    class SecondNet(nn.Cell):
+        def __init__(self):
+            super(SecondNet, self).__init__()
+            self.add = P.TensorAdd()
+
+        def construct(self, key1=1, key2=2):
+            tensor_d = self.add(key1, key2["key"])
+            return tensor_d
+
+    x = Tensor(np.array([1, 2.5, 3.5]), mstype.float32)
+    y = Tensor(np.array([4, 5.5, 6.5]), mstype.float32)
+    net = FirstNet()
+    net(x, y)
diff --git a/tests/ut/python/nn/optim/test_adam.py b/tests/ut/python/nn/optim/test_adam.py
index b435bf65b9..03a73893c5 100644
--- a/tests/ut/python/nn/optim/test_adam.py
+++ b/tests/ut/python/nn/optim/test_adam.py
@@ -17,12 +17,13 @@ import numpy as np
 import pytest
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import Adam, AdamWeightDecay, AdamWeightDecayDynamicLR
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     """ Net definition """
@@ -53,8 +54,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
index 7f9f341a93..23aad24c47 100644
--- a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
+++ b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
@@ -27,6 +27,7 @@ from mindspore.ops import functional as F
 from mindspore._checkparam import Validator as validator
 from mindspore._checkparam import Rel
 
+context.set_context(enable_sparse=True)
 
 adam_opt_for_map = C.MultitypeFuncGraph("adam_opt_for_map")
 @adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -154,7 +155,7 @@ def test_AdamWeightDecaySparse():
     class NetWithSparseGatherV2(nn.Cell):
         def __init__(self):
             super(NetWithSparseGatherV2, self).__init__()
-            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
+            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1")
             self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
             self.gatherv2 = P.SparseGatherV2()
             self.axis = 0
diff --git a/tests/ut/python/nn/optim/test_ftrl.py b/tests/ut/python/nn/optim/test_ftrl.py
index de59dfdbad..670bebc92d 100644
--- a/tests/ut/python/nn/optim/test_ftrl.py
+++ b/tests/ut/python/nn/optim/test_ftrl.py
@@ -17,12 +17,13 @@
 import numpy as np
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import FTRL
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     def __init__(self):
@@ -41,8 +42,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_lazyadam.py b/tests/ut/python/nn/optim/test_lazyadam.py
index ce66b404e2..7769597140 100644
--- a/tests/ut/python/nn/optim/test_lazyadam.py
+++ b/tests/ut/python/nn/optim/test_lazyadam.py
@@ -17,12 +17,13 @@ import numpy as np
 import pytest
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import LazyAdam
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     """ Net definition """
@@ -43,8 +44,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_proximal_ada_grad.py b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
index c7e6d3f88a..3077896fed 100644
--- a/tests/ut/python/nn/optim/test_proximal_ada_grad.py
+++ b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
@@ -17,12 +17,13 @@
 import numpy as np
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import ProximalAdagrad
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     def __init__(self):
@@ -40,8 +41,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
-                                 sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/test_distribution.py b/tests/ut/python/nn/test_distribution.py
new file mode 100644
index 0000000000..845c64a110
--- /dev/null
+++ b/tests/ut/python/nn/test_distribution.py
@@ -0,0 +1,369 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Test nn.Distribution.
+
+Including Normal Distribution and Bernoulli Distribution.
+"""
+import pytest
+import numpy as np
+
+import mindspore.nn as nn
+from mindspore import dtype
+from mindspore import Tensor
+
+def test_normal_shape_errpr():
+    """
+    Invalid shapes.
+    """
+    with pytest.raises(ValueError):
+        nn.Normal([[2.], [1.]], [[2.], [3.], [4.]], dtype=dtype.float32)
+
+def test_no_arguments():
+    """
+    No args passed in during initialization.
+    """
+    n = nn.Normal()
+    assert isinstance(n, nn.Distribution)
+    b = nn.Bernoulli()
+    assert isinstance(b, nn.Distribution)
+
+def test_with_arguments():
+    """
+    Args passed in during initialization.
+    """
+    n = nn.Normal([3.0], [4.0], dtype=dtype.float32)
+    assert isinstance(n, nn.Distribution)
+    b = nn.Bernoulli([0.3, 0.5], dtype=dtype.int32)
+    assert isinstance(b, nn.Distribution)
+
+class NormalProb(nn.Cell):
+    """
+    Normal distribution: initialize with mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb, self).__init__()
+        self.normal = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+
+    def construct(self, value):
+        x = self.normal('prob', value)
+        y = self.normal('log_prob', value)
+        return x, y
+
+def test_normal_prob():
+    """
+    Test pdf/log_pdf: passing value through construct.
+    """
+    net = NormalProb()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class NormalProb1(nn.Cell):
+    """
+    Normal distribution: initialize without mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb1, self).__init__()
+        self.normal = nn.Normal()
+
+    def construct(self, value, mean, sd):
+        x = self.normal('prob', value, mean, sd)
+        y = self.normal('log_prob', value, mean, sd)
+        return x, y
+
+def test_normal_prob1():
+    """
+    Test pdf/logpdf: passing mean/sd, value through construct.
+    """
+    net = NormalProb1()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    mean = Tensor([0.0], dtype=dtype.float32)
+    sd = Tensor([1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value, mean, sd)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class NormalProb2(nn.Cell):
+    """
+    Normal distribution: initialize with mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb2, self).__init__()
+        self.normal = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+
+    def construct(self, value, mean, sd):
+        x = self.normal('prob', value, mean, sd)
+        y = self.normal('log_prob', value, mean, sd)
+        return x, y
+
+def test_normal_prob2():
+    """
+    Test pdf/log_pdf: passing mean/sd through construct.
+    Overwrite original mean/sd.
+    """
+    net = NormalProb2()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    mean = Tensor([0.0], dtype=dtype.float32)
+    sd = Tensor([1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value, mean, sd)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class BernoulliProb(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliProb, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self, value):
+        return self.bernoulli('prob', value)
+
+class BernoulliLogProb(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self, value):
+        return self.bernoulli('log_prob', value)
+
+
+def test_bernoulli_prob():
+    """
+    Test pmf/log_pmf: passing value through construct.
+    """
+    net = BernoulliProb()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    pmf = net(value)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob():
+    """
+    Test pmf/log_pmf: passing value through construct.
+    """
+    net = BernoulliLogProb()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    log_pmf = net(value)
+    assert isinstance(log_pmf, Tensor)
+
+class BernoulliProb1(nn.Cell):
+    """
+    Bernoulli distribution: initialize without probs.
+    """
+    def __init__(self):
+        super(BernoulliProb1, self).__init__()
+        self.bernoulli = nn.Bernoulli()
+
+    def construct(self, value, probs):
+        return self.bernoulli('prob', value, probs)
+
+class BernoulliLogProb1(nn.Cell):
+    """
+    Bernoulli distribution: initialize without probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb1, self).__init__()
+        self.bernoulli = nn.Bernoulli()
+
+    def construct(self, value, probs):
+        return self.bernoulli('log_prob', value, probs)
+
+
+def test_bernoulli_prob1():
+    """
+    Test pmf/log_pmf: passing probs through construct.
+    """
+    net = BernoulliProb1()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    pmf = net(value, probs)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob1():
+    """
+    Test pmf/log_pmf: passing probs through construct.
+    """
+    net = BernoulliLogProb1()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    log_pmf = net(value, probs)
+    assert isinstance(log_pmf, Tensor)
+
+class BernoulliProb2(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliProb2, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5)
+
+    def construct(self, value, probs):
+        return self.bernoulli('prob', value, probs)
+
+class BernoulliLogProb2(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb2, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5)
+
+    def construct(self, value, probs):
+        return self.bernoulli('log_prob', value, probs)
+
+
+def test_bernoulli_prob2():
+    """
+    Test pmf/log_pmf: passing probs/value through construct.
+    Overwrite original probs.
+    """
+    net = BernoulliProb2()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    pmf = net(value, probs)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob2():
+    """
+    Test pmf/log_pmf: passing probs/value through construct.
+    Overwrite original probs.
+    """
+    net = BernoulliLogProb2()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    log_pmf = net(value, probs)
+    assert isinstance(log_pmf, Tensor)
+
+
+class NormalKl(nn.Cell):
+    """
+    Test class: kl_loss of Normal distribution.
+    """
+    def __init__(self):
+        super(NormalKl, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([4.0]), dtype=dtype.float32)
+
+    def construct(self, x_, y_):
+        return self.n('kl_loss', 'Normal', x_, y_)
+
+class BernoulliKl(nn.Cell):
+    """
+    Test class: kl_loss between Bernoulli distributions.
+    """
+    def __init__(self):
+        super(BernoulliKl, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    def construct(self, x_):
+        return self.b('kl_loss', 'Bernoulli', x_)
+
+def test_kl():
+    """
+    Test kl_loss function.
+    """
+    nor_net = NormalKl()
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+    mean = Tensor(mean_b, dtype=dtype.float32)
+    sd = Tensor(sd_b, dtype=dtype.float32)
+    loss = nor_net(mean, sd)
+    assert isinstance(loss, Tensor)
+
+    ber_net = BernoulliKl()
+    probs_b = Tensor([0.3], dtype=dtype.float32)
+    loss = ber_net(probs_b)
+    assert isinstance(loss, Tensor)
+
+
+class NormalKlNoArgs(nn.Cell):
+    """
+    Test class: kl_loss of Normal distribution.
+    No args during initialization.
+    """
+    def __init__(self):
+        super(NormalKlNoArgs, self).__init__()
+        self.n = nn.Normal(dtype=dtype.float32)
+
+    def construct(self, x_, y_, w_, v_):
+        return self.n('kl_loss', 'Normal', x_, y_, w_, v_)
+
+class BernoulliKlNoArgs(nn.Cell):
+    """
+    Test class: kl_loss between Bernoulli distributions.
+    No args during initialization.
+    """
+    def __init__(self):
+        super(BernoulliKlNoArgs, self).__init__()
+        self.b = nn.Bernoulli(dtype=dtype.int32)
+
+    def construct(self, x_, y_):
+        return self.b('kl_loss', 'Bernoulli', x_, y_)
+
+def test_kl_no_args():
+    """
+    Test kl_loss function.
+    """
+    nor_net = NormalKlNoArgs()
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+    mean_a = np.array([2.0]).astype(np.float32)
+    sd_a = np.array([3.0]).astype(np.float32)
+    mean_b = Tensor(mean_b, dtype=dtype.float32)
+    sd_b = Tensor(sd_b, dtype=dtype.float32)
+    mean_a = Tensor(mean_a, dtype=dtype.float32)
+    sd_a = Tensor(sd_a, dtype=dtype.float32)
+    loss = nor_net(mean_b, sd_b, mean_a, sd_a)
+    assert isinstance(loss, Tensor)
+
+    ber_net = BernoulliKlNoArgs()
+    probs_b = Tensor([0.3], dtype=dtype.float32)
+    probs_a = Tensor([0.7], dtype=dtype.float32)
+    loss = ber_net(probs_b, probs_a)
+    assert isinstance(loss, Tensor)
+
+
+
+class NormalBernoulli(nn.Cell):
+    """
+    Test class: basic mean/sd function.
+    """
+    def __init__(self):
+        super(NormalBernoulli, self).__init__()
+        self.n = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+        self.b = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self):
+        normal_mean = self.n('mean')
+        normal_sd = self.n('sd')
+        bernoulli_mean = self.b('mean')
+        bernoulli_sd = self.b('sd')
+        return normal_mean, normal_sd, bernoulli_mean, bernoulli_sd
+
+def test_bascis():
+    """
+    Test mean/sd functionality of Normal and Bernoulli.
+    """
+    net = NormalBernoulli()
+    normal_mean, normal_sd, bernoulli_mean, bernoulli_sd = net()
+    assert isinstance(normal_mean, Tensor)
+    assert isinstance(normal_sd, Tensor)
+    assert isinstance(bernoulli_mean, Tensor)
+    assert isinstance(bernoulli_sd, Tensor)
diff --git a/tests/ut/python/nn/test_msssim.py b/tests/ut/python/nn/test_msssim.py
new file mode 100644
index 0000000000..b85d13c927
--- /dev/null
+++ b/tests/ut/python/nn/test_msssim.py
@@ -0,0 +1,135 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+test msssim
+"""
+import numpy as np
+import pytest
+
+import mindspore.common.dtype as mstype
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import _executor
+
+_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
+
+class MSSSIMNet(nn.Cell):
+    def __init__(self, max_val=1.0, power_factors=_MSSSIM_WEIGHTS, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
+        super(MSSSIMNet, self).__init__()
+        self.net = nn.MSSSIM(max_val, power_factors, filter_size, filter_sigma, k1, k2)
+
+    def construct(self, img1, img2):
+        return self.net(img1, img2)
+
+
+def test_compile():
+    factors = (0.033, 0.033, 0.033)
+    net = MSSSIMNet(power_factors=factors)
+    img1 = Tensor(np.random.random((8, 3, 128, 128)))
+    img2 = Tensor(np.random.random((8, 3, 128, 128)))
+    _executor.compile(net, img1, img2)
+
+
+def test_compile_grayscale():
+    max_val = 255
+    factors = (0.033, 0.033, 0.033)
+    net = MSSSIMNet(max_val=max_val, power_factors=factors)
+    img1 = Tensor(np.random.randint(0, 256, (8, 3, 128, 128), np.uint8))
+    img2 = Tensor(np.random.randint(0, 256, (8, 3, 128, 128), np.uint8))
+    _executor.compile(net, img1, img2)
+
+
+def test_msssim_max_val_negative():
+    max_val = -1
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_max_val_bool():
+    max_val = True
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_max_val_zero():
+    max_val = 0
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_power_factors_set():
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(power_factors={0.033, 0.033, 0.033})
+
+
+def test_msssim_filter_size_float():
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(filter_size=1.1)
+
+
+def test_msssim_filter_size_zero():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_size=0)
+
+
+def test_msssim_filter_sigma_zero():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_sigma=0.0)
+
+
+def test_msssim_filter_sigma_negative():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_sigma=-0.1)
+
+
+def test_msssim_different_shape():
+    shape_1 = (8, 3, 128, 128)
+    shape_2 = (8, 3, 256, 256)
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random(shape_1))
+    img2 = Tensor(np.random.random(shape_2))
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, img2)
+
+
+def test_msssim_different_dtype():
+    dtype_1 = mstype.float32
+    dtype_2 = mstype.float16
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random((8, 3, 128, 128)), dtype=dtype_1)
+    img2 = Tensor(np.random.random((8, 3, 128, 128)), dtype=dtype_2)
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(TypeError):
+        _executor.compile(net, img1, img2)
+
+
+def test_msssim_invalid_5d_input():
+    shape_1 = (8, 3, 128, 128)
+    shape_2 = (8, 3, 256, 256)
+    invalid_shape = (8, 3, 128, 128, 1)
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random(shape_1))
+    invalid_img1 = Tensor(np.random.random(invalid_shape))
+    img2 = Tensor(np.random.random(shape_2))
+    invalid_img2 = Tensor(np.random.random(invalid_shape))
+
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, invalid_img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, invalid_img2)
diff --git a/tests/ut/python/nn/test_ssim.py b/tests/ut/python/nn/test_ssim.py
index 5cf1b0c94c..8b7e441014 100644
--- a/tests/ut/python/nn/test_ssim.py
+++ b/tests/ut/python/nn/test_ssim.py
@@ -78,26 +78,6 @@ def test_ssim_filter_sigma_negative():
         _ = SSIMNet(filter_sigma=-0.1)
 
 
-def test_ssim_k1_k2_wrong_value():
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=1.1)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=1.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=0.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=-1.0)
-
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=1.1)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=1.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=0.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=-1.0)
-
-
 def test_ssim_different_shape():
     shape_1 = (8, 3, 16, 16)
     shape_2 = (8, 3, 8, 8)
diff --git a/tests/ut/python/ops/test_control_ops.py b/tests/ut/python/ops/test_control_ops.py
index 064512b19a..53b42b8f66 100644
--- a/tests/ut/python/ops/test_control_ops.py
+++ b/tests/ut/python/ops/test_control_ops.py
@@ -600,3 +600,42 @@ def test_while_tensor():
     x = Tensor(np.ones([6, 8, 10], np.int32))
     y = Tensor(np.ones([6, 8, 10], np.int32))
     out = net(x, y)
+
+
+def test_large_for_loop():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+            self.flatten = P.ReLU() #nn.Flatten()
+
+        def construct(self, x):
+            for elem in range(1, 19000):
+                x = self.flatten(x + elem)
+            return x
+
+    t = Tensor(np.ones([2, 3], dtype=np.float32))
+    net = Net()
+    net(t)
+
+
+def test_large_for_loop_with_continue_break():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+            self.flatten = P.ReLU() #nn.Flatten()
+
+        def construct(self, x):
+            idx = 0
+            for elem1 in range(200):
+                idx = idx + 1
+                if idx < 10:
+                    x = x + 0.5
+                    continue
+                if idx > 500:
+                    break
+                x = self.flatten(x + elem1)
+            return x
+
+    t = Tensor(np.ones([2, 3], dtype=np.float32))
+    net = Net()
+    net(t)
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 029d49fe1c..31ca540f74 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -649,6 +649,15 @@ def test_strided_slice_const():
     assert (ret.asnumpy() == np.array([], np.float32).reshape([0, 1, 7, 8, 9, 3, 1])).all()
 
 
+class ParallelConcatNet(nn.Cell):
+    def __init__(self):
+        super(ParallelConcatNet, self).__init__()
+        self.parallel_concat = P.ParallelConcat()
+
+    def construct(self, x1, x2):
+        return self.parallel_concat((x1, x2))
+
+
 test_case_math_ops = [
     ('BitwiseAnd', {
         'block': P.BitwiseAnd(),
@@ -1391,6 +1400,11 @@ test_case_nn_ops = [
         'desc_const': [4],
         'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([1, 2, 3]).astype(np.int32))],
         'desc_bprop': [[4, 2, 1, 3]]}),
+    ('UnsortedSegmentProd', {
+        'block': P.UnsortedSegmentProd(),
+        'desc_const': [4],
+        'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([0, 1, 0]).astype(np.int32))],
+        'desc_bprop': [[4, 2, 1, 3]]}),
     ('DropoutGenMask', {
         'block': P.DropoutGenMask(),
         'desc_const': [(2, 2), Tensor(0.5, mstype.float32)],
@@ -1948,6 +1962,12 @@ test_case_array_ops = [
         'desc_inputs': [[1, 3, 24, 24]],
         'desc_bprop': [[1, 12, 24, 24]],
     }),
+    ('ParallelConcat', {
+        'block': ParallelConcatNet(),
+        'desc_inputs': [Tensor([[1, 2]], mstype.float32),
+                        Tensor([[5, 6]], mstype.float32)],
+        'skip': ['backward'],
+    }),
 ]
 
 test_case_other_ops = [
@@ -2216,7 +2236,10 @@ test_case_other_ops = [
         'desc_inputs': [Tensor(np.array([1.1]).astype(np.float32)),
                         Tensor(np.array([1.2]).astype(np.float32))],
         'skip': ['backward']}),
-
+    ('PopulationCount', {
+        'block': P.PopulationCount(),
+        'desc_inputs': [Tensor(np.array([1, 2, 3]).astype(np.int16))],
+        'skip': ['backward']}),
 ]
 
 test_case_quant_ops = [
diff --git a/tests/ut/python/optimizer/test_python_pass.py b/tests/ut/python/optimizer/test_python_pass.py
new file mode 100644
index 0000000000..c3ce3d6c4e
--- /dev/null
+++ b/tests/ut/python/optimizer/test_python_pass.py
@@ -0,0 +1,64 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.ops import operations as P
+from mindspore.common.python_pass_register import registe_pass, PyPassManager
+from mindspore.common.api import _generate_pip_args
+from mindspore._c_expression import generate_key, Executor_
+
+context.set_context(mode=context.GRAPH_MODE)
+
+def get_func_graph(obj, *args, phase="predict"):
+    args_names, args_list = _generate_pip_args(obj, *args)
+    dic = dict(zip(args_names, args_list))
+    key = generate_key(phase, dic)
+    phase_prefix = str(key[1])
+    if phase == 'export':
+        phase = phase + '.' + phase_prefix + '.' + str(obj.create_time)
+    else:
+        phase = phase_prefix + phase + '.' + str(obj.create_time)
+    _executor = Executor_.get_instance()
+    _executor.compile(obj, args_list, phase, False)
+    return _executor.get_func_graph(phase)
+
+def test_softmax_relu():
+    """
+    Use python pass to transform from Softmax to ReLU.
+    """
+    inputs = Tensor(np.ones([42]), mindspore.float16)
+    softmax_model = nn.Softmax()
+
+    @registe_pass(run_only_once=True)
+    def softmax_relu_pass():
+        softmax = P.Softmax()
+        relu = P.ReLU()
+        def pattern(x):
+            x = softmax(x)
+            return x
+        def target(x):
+            x = relu(x)
+            return x
+        return pattern, target
+
+    transformed_repr = get_func_graph(softmax_model, inputs).get_return().expanded_str(2)
+    ppm = PyPassManager()
+    ppm.unregiste(softmax_relu_pass)
+    assert "ReLU" in transformed_repr
+    assert "Softmax" not in transformed_repr
diff --git a/tests/ut/python/parallel/test_embeddinglookup.py b/tests/ut/python/parallel/test_embeddinglookup.py
index 4ab5f5f878..db84ab26eb 100644
--- a/tests/ut/python/parallel/test_embeddinglookup.py
+++ b/tests/ut/python/parallel/test_embeddinglookup.py
@@ -19,7 +19,6 @@ import mindspore.nn as nn
 from mindspore.common.api import _executor
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
-from mindspore.ops.operations import _inner_ops as inner
 from mindspore import Tensor, context
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 
@@ -42,17 +41,15 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict)
 
 class Net(nn.Cell):
-    def __init__(self, shape, offset, reduce_scatter_flag, split_num):
+    def __init__(self, shape, offset, strategy1=None, strategy2=None, target="Device"):
         super().__init__()
         self.index = Tensor(np.ones(shape), dtype=ms.int32)
         self.offset = offset
-        self.reduce_scatter_flag = reduce_scatter_flag
-        self.split_num = split_num
-        self.elu = inner.EmbeddingLookup()
-        self.mm = P.BatchMatMul()
+        self.elu = P.EmbeddingLookup().set_strategy(strategy1).add_prim_attr("primitive_target", target)
+        self.mm = P.BatchMatMul().set_strategy(strategy2)
 
     def construct(self, x, y):
-        out = self.elu(x, self.index, self.offset, self.reduce_scatter_flag, self.split_num)
+        out = self.elu(x, self.index, self.offset)
         out = self.mm(out, y)
         return out
 
@@ -60,9 +57,7 @@ class Net(nn.Cell):
 def test_embeddinglookup_reducescatter_false():
     shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = False
-    split_num = 1
-    net = NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num))
+    net = NetWithLoss(Net(shape, offset))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -71,11 +66,9 @@ def test_embeddinglookup_reducescatter_false():
 
 
 def test_embeddinglookup_reducescatter_true():
-    shape = [64, 8]
+    shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = True
-    split_num = 8
-    net = NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num))
+    net = NetWithLoss(Net(shape, offset))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -86,9 +79,7 @@ def test_embeddinglookup_reducescatter_true():
 def test_embeddinglookup_reducescatter_false_grad():
     shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = False
-    split_num = 1
-    net = GradWrap(NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num)))
+    net = GradWrap(NetWithLoss(Net(shape, offset)))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -98,13 +89,39 @@ def test_embeddinglookup_reducescatter_false_grad():
 
 def test_embeddinglookup_reducescatter_true_grad():
     context.set_context(save_graphs=True)
-    shape = [64, 8]
+    shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = True
-    split_num = 8
-    net = GradWrap(NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num)))
+    net = GradWrap(NetWithLoss(Net(shape, offset)))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
     _executor.compile(net, x, y)
+
+
+def test_embeddinglookup_semi_auto1():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    shape = [64, 32]
+    offset = 0
+    strategy1 = ((8, 1), (1, 1))
+    strategy2 = ((4, 1, 2), (4, 2, 1))
+    net = GradWrap(NetWithLoss(Net(shape, offset, strategy1, strategy2, "CPU")))
+
+    net.set_auto_parallel()
+    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
+    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
+    _executor.compile(net, x, y)
+
+
+def test_embeddinglookup_semi_auto2():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    shape = [64, 32]
+    offset = 0
+    strategy1 = ((1, 8), (1, 1))
+    strategy2 = ((4, 1, 2), (4, 2, 1))
+    net = GradWrap(NetWithLoss(Net(shape, offset, strategy1, strategy2, "CPU")))
+
+    net.set_auto_parallel()
+    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
+    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
+    _executor.compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py
index 5d52089cbe..2e853875bf 100644
--- a/tests/ut/python/parallel/test_gather_v2.py
+++ b/tests/ut/python/parallel/test_gather_v2.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
@@ -182,39 +181,3 @@ def test_gatherv2_auto1():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
     _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu0():
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((8, 1), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu1():
-    context.set_auto_parallel_context(device_num=16, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((16, 1), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu2():
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((1, 8), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_manual_gatherv2.py b/tests/ut/python/parallel/test_manual_gatherv2.py
new file mode 100644
index 0000000000..21d25ae720
--- /dev/null
+++ b/tests/ut/python/parallel/test_manual_gatherv2.py
@@ -0,0 +1,61 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import mindspore as ms
+from mindspore import context, Tensor, Parameter
+from mindspore.common.api import _executor
+from mindspore.nn import Cell, TrainOneStepCell, Momentum
+from mindspore.ops import operations as P
+from mindspore.common.initializer import initializer
+
+class Net(Cell):
+    def __init__(self, strategy1=None, strategy2=None, strategy3=None):
+        super().__init__()
+        self.gatherv2 = P.GatherV2().set_strategy(strategy1)
+        self.gatherv2.add_prim_attr("manual_split", ((1, 0), (7, 1)))
+        self.mul = P.Mul().set_strategy(strategy2)
+        self.reshape = P.Reshape()
+        self.matmul = P.MatMul().set_strategy(strategy3)
+        self.matmul.add_prim_attr("forward_reduce_scatter", True)
+        self.param = Parameter(initializer("ones", (8, 64), ms.float32), name="gatherv2_param")
+        self.mul_weight = Parameter(initializer("ones", (2, 4, 64), ms.float32), name="mul_weight")
+        self.matmul_weight = Parameter(initializer("ones", (256, 16), ms.float32), name="matmul_weight")
+
+    def construct(self, x, b):
+        out = self.gatherv2(self.param, x, 0)
+        out = self.mul(out, self.mul_weight)
+        out = self.reshape(out, (2, 256))
+        out = self.matmul(out, self.matmul_weight)
+        return out
+
+_x = Tensor(np.ones([2, 4]), dtype=ms.int32)
+_b = Tensor(np.ones([64, 8]), dtype=ms.float32)
+
+def compile_net(net):
+    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+    train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
+    _executor.compile(train_net, _x, _b)
+    context.reset_auto_parallel_context()
+
+def test_neg_data_parallel():
+    context.set_context(save_graphs=True)
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=2, global_rank=0)
+    strategy1 = ((2, 1), (1, 2))
+    strategy2 = ((1, 2, 1), (1, 2, 1))
+    strategy3 = ((1, 2), (2, 1))
+    net = Net(strategy1, strategy2, strategy3)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_sparse_gather_v2.py b/tests/ut/python/parallel/test_sparse_gather_v2.py
index dd0517a08e..2d4d0c2bf2 100644
--- a/tests/ut/python/parallel/test_sparse_gather_v2.py
+++ b/tests/ut/python/parallel/test_sparse_gather_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 
 import mindspore as ms
 import mindspore.nn as nn
@@ -184,6 +185,7 @@ def test_gatherv2_auto1():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu0():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((8, 1), (1, 1))
@@ -196,6 +198,7 @@ def test_gatherv2_cpu0():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu1():
     context.set_auto_parallel_context(device_num=16, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((16, 1), (1, 1))
@@ -208,6 +211,7 @@ def test_gatherv2_cpu1():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu2():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((1, 8), (1, 1))
diff --git a/tests/ut/python/parameter_feature/test_var_grad.py b/tests/ut/python/parameter_feature/test_var_grad.py
index 7a332b1c3b..f0358394e7 100644
--- a/tests/ut/python/parameter_feature/test_var_grad.py
+++ b/tests/ut/python/parameter_feature/test_var_grad.py
@@ -22,7 +22,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.nn import Cell
 from mindspore.ops import operations as P
 
-context.set_context(mode=context.GRAPH_MODE)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
 
 
 def test_net_vargs_expand():
@@ -184,6 +184,27 @@ def test_grad_var_args_with_sens():
     _ = grad_net(x, y, sens)
 
 
+def test_grad_with_param_sens():
+    """"test grad_with_sens parameter"""
+
+    class GradNet(Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.weights = ParameterTuple(net.trainable_params())
+            self.net = net
+            self.sens = Parameter(Tensor(np.ones([3, 4, 5]), dtype=mstype.float32), name='sens', requires_grad=False)
+            self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
+
+        def construct(self, x, y):
+            return self.grad(self.net, self.weights)(x, y, self.sens)
+
+    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
+    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
+    net = SecondNet()
+    grad_net = GradNet(net)
+    _ = grad_net(x, y)
+
+
 def test_var_args_grad():
     class VarNet(Cell):
         def __init__(self, net):
diff --git a/tests/ut/python/pipeline/infer/test_hypermap_specialize.py b/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
index 1f669f7355..c292e3662d 100644
--- a/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
+++ b/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
@@ -53,4 +53,4 @@ def test_hypermap_specialize_param():
 
     expected_ret = (Tensor(np.full(1, 5).astype(np.int32)), Tensor(np.full(2, 5).astype(np.int32)))
     ret = hypermap_specialize_param()
-    assert ret == (expected_ret, expected_ret)
+    assert ret == (expected_ret, list(expected_ret))
diff --git a/tests/ut/python/pipeline/infer/test_net_infer.py b/tests/ut/python/pipeline/infer/test_net_infer.py
index 6b32a7617d..9c19f213f5 100644
--- a/tests/ut/python/pipeline/infer/test_net_infer.py
+++ b/tests/ut/python/pipeline/infer/test_net_infer.py
@@ -45,6 +45,7 @@ def test_net_infer():
 
 
 def test_assign_in_while():
+    context.set_context(device_target="Ascend")
     context.set_context(mode=context.GRAPH_MODE)
     class Net(nn.Cell):
         def __init__(self, input_shape):
diff --git a/tests/ut/python/pynative_mode/test_cell_bprop.py b/tests/ut/python/pipeline/parse/test_cell_bprop.py
similarity index 95%
rename from tests/ut/python/pynative_mode/test_cell_bprop.py
rename to tests/ut/python/pipeline/parse/test_cell_bprop.py
index 09a096a090..e896ddc9ac 100644
--- a/tests/ut/python/pynative_mode/test_cell_bprop.py
+++ b/tests/ut/python/pipeline/parse/test_cell_bprop.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
 from mindspore import Parameter
@@ -24,12 +25,14 @@ from mindspore.common.initializer import initializer
 from mindspore.common.tensor import Tensor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from ....mindspore_test_framework.utils.bprop_util import bprop
+from .....mindspore_test_framework.utils.bprop_util import bprop
 
 
 def setup_module(module):
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
 
+def teardown_module(module):
+    context.set_context(device_target="Ascend")
 
 class MulAdd(nn.Cell):
     def __init__(self):
@@ -45,7 +48,9 @@ class MulAdd(nn.Cell):
 
 def test_grad_mul_add():
     mul_add = MulAdd()
-    assert C.grad_all(mul_add)(1, 2) == (2, 4)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(mul_add)(x, y) == (2, 4)
 
 
 class InlineMulADD(nn.Cell):
@@ -60,7 +65,9 @@ class InlineMulADD(nn.Cell):
 
 def test_grad_inline_mul_add():
     inline_mul_add = InlineMulADD()
-    assert C.grad_all(inline_mul_add)(1, 2) == (3, 6)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(inline_mul_add)(x, y) == (3, 6)
 
 
 class WithParameter(nn.Cell):
@@ -93,7 +100,9 @@ class WithNoBprop(nn.Cell):
 
 def test_with_no_bprop():
     with_no_bprop = WithNoBprop()
-    assert C.grad_all(with_no_bprop)(1, 2) == (2, 1)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(with_no_bprop)(x, y) == (2, 1)
 
 
 def test_grad_in_bprop_1():
diff --git a/tests/ut/python/pipeline/parse/test_enumerate.py b/tests/ut/python/pipeline/parse/test_enumerate.py
index cd808696f1..37f9c603df 100644
--- a/tests/ut/python/pipeline/parse/test_enumerate.py
+++ b/tests/ut/python/pipeline/parse/test_enumerate.py
@@ -91,6 +91,7 @@ def test_enumerate_tuple_parameter():
                 index_sum += i
                 ret += (j,)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
@@ -127,10 +128,12 @@ def test_enumerate_tuple_parameter_1():
                 index_sum += i[0]
                 ret += (i[1],)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
 
+
 def test_enumerate_tuple_const_2():
     class Net(nn.Cell):
         def __init__(self):
@@ -162,20 +165,37 @@ def test_enumerate_tuple_parameter_2():
                 index_sum += i[0]
                 ret += (i[1],)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
 
 
-def test_enumerate_parameter_type_error():
+def test_enumerate_first_input_type_error():
     class Net(nn.Cell):
         def __init__(self):
             super(Net, self).__init__()
 
         def construct(self, x):
             return enumerate(x)
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     with pytest.raises(TypeError) as ex:
         net(x)
-    assert  "For 'enumerate', the input parameter should be tuple or list" in str(ex.value)
+    assert "For 'enumerate', the 'first input'" in str(ex.value)
+
+
+def test_enumerate_start_type_error():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x):
+            return enumerate(x, start=1.2)
+
+    x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
+    net = Net()
+    with pytest.raises(TypeError) as ex:
+        net((x, x))
+    assert "For 'enumerate', the 'start'" in str(ex.value)
diff --git a/tests/ut/python/pipeline/parse/test_for_stmt.py b/tests/ut/python/pipeline/parse/test_for_stmt.py
index 4930dae796..748c73e873 100644
--- a/tests/ut/python/pipeline/parse/test_for_stmt.py
+++ b/tests/ut/python/pipeline/parse/test_for_stmt.py
@@ -17,6 +17,9 @@ from dataclasses import dataclass
 import numpy as np
 
 from mindspore import Tensor, Model, context
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.nn import Cell
 from mindspore.nn import ReLU
 from ...ut_filter import non_graph_engine
@@ -66,3 +69,58 @@ def function_access_base(number):
 def test_access_0040():
     """ test_access_0040 """
     function_access_base(2)
+
+
+class OpSeqNet(Cell):
+    def __init__(self, loop_count=1):
+        super().__init__()
+        self.loop_count = loop_count
+        self.op_seq = (P.Sqrt(), P.Reciprocal(), P.Square())
+
+    def construct(self, x):
+        t = x
+        for op in self.op_seq:
+            t = op(t)
+        return t
+
+
+def test_op_seq_test():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = OpSeqNet()
+    input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
+    input_me = Tensor(input_np)
+    net(input_me)
+
+
+_grad_fusion = C.MultitypeFuncGraph("grad_fushion")
+
+
+@_grad_fusion.register("Tensor", "Function")
+def tensor_grad_scale(x, op):
+    return op(x)
+
+
+class AllReduceTest(Cell):
+    def __init__(self, loop_count=1):
+        super().__init__()
+        self.op_list = ()
+        self.fushion_flag = [0, 1, 1, 0, 1, 0]
+        for i in self.fushion_flag:
+            op = P.AllReduce().add_prim_attr('fusion', i)
+            self.op_list = self.op_list + (op,)
+        self.hyper_map = C.HyperMap()
+
+    def construct(self, x):
+        ret = ()
+        for _ in self.fushion_flag:
+            ret = ret + (x,)
+        fushion_res = self.hyper_map(F.partial(_grad_fusion), ret, self.op_list)
+        return fushion_res
+
+
+def test_allreduce_fushio_test():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = AllReduceTest()
+    input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
+    input_me = Tensor(input_np)
+    net(input_me)
diff --git a/tests/ut/python/pipeline/parse/test_parse.py b/tests/ut/python/pipeline/parse/test_parse.py
index bbc32d0728..b295adcbec 100644
--- a/tests/ut/python/pipeline/parse/test_parse.py
+++ b/tests/ut/python/pipeline/parse/test_parse.py
@@ -19,21 +19,27 @@
 @Desc  :
 """
 import logging
+import pytest
 import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
+from mindspore import context
+from mindspore.ops import composite as C
 from mindspore.common.api import ms_function, _executor
+from mindspore.ops._grad.grad_base import bprop_getters
+from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer
 from mindspore.ops.functional import tensor_add
 from ...ut_filter import non_graph_engine
 
-# pylint: disable=W0613
+# pylint: disable=W0613,W0612
 # W0613: unused-argument
 
 
 log = logging.getLogger("test")
 log.setLevel(level=logging.ERROR)
+context.set_context(mode=context.GRAPH_MODE)
 
 
 # Test case: use the parse obj interface use default parameter
@@ -135,3 +141,113 @@ def test_net_with_ndarray():
     input_data = np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')
 
     net(ms.Tensor(input_data))
+
+
+def test_bprop_with_wrong_output_num():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputNum(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum')
+
+        def __call__(self, x, y):
+            return x
+
+        def infer_shape(self, x_shape, yshape):
+            return x_shape
+
+        def infer_dtype(self, x_type, y_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputNum)
+    def get_bprop_with_wrong_output_num(self):
+        """Generate bprop for BpropWithWrongOutputNum"""
+
+        def bprop(x, y, out, dout):
+            return (dout,)
+
+        return bprop
+
+    class BpropWithWrongOutputNumCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputNumCell, self).__init__()
+
+        def construct(self, x, y):
+            return BpropWithWrongOutputNum()(x, y)
+
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
+
+def test_bprop_with_wrong_output_type():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputType(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputType)
+    def get_bprop_with_wrong_output_type(self):
+        """Generate bprop for BpropWithWrongOutputType"""
+
+        def bprop(x, out, dout):
+            return (1,)
+
+        return bprop
+
+    class BpropWithWrongOutputTypeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputTypeCell, self).__init__()
+
+        def construct(self, x):
+            return BpropWithWrongOutputType()(x)
+
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
+
+
+def test_bprop_with_wrong_output_shape():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputShape(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputShape)
+    def get_bprop_with_wrong_output_shape(self):
+        """Generate bprop for BpropWithWrongOutputShape"""
+        ones = Tensor(np.ones([2,]).astype(np.int32))
+
+        def bprop(x, out, dout):
+            return (ones,)
+
+        return bprop
+
+    class BpropWithWrongOutputShapeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputShapeCell, self).__init__()
+
+        def construct(self, x):
+            return BpropWithWrongOutputShape()(x)
+
+    with pytest.raises(TypeError):
+        net = BpropWithWrongOutputShapeCell()
+        net.set_grad()
+        C.grad_all(net)(Tensor(np.ones([64, 10]).astype(np.int32)))
diff --git a/tests/ut/python/pynative_mode/nn/test_tensor_operation.py b/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
index 306ba63c9f..eb8610bdf1 100644
--- a/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
+++ b/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
@@ -78,3 +78,9 @@ def test_tensor_imul():
     y = Tensor(np.ones([3, 3, 3, 3]).astype(np.float32))
     x *= y
     assert x.asnumpy()[0][0][0][0] == 1.0
+
+
+def test_tensor_pow():
+    x = Tensor(np.ones([3, 3, 3, 3]).astype(np.float32) * 2)
+    y = x ** 3
+    assert y.asnumpy()[0][0][0][0] == 8.0
diff --git a/tests/ut/python/pynative_mode/ops/test_grad.py b/tests/ut/python/pynative_mode/ops/test_grad.py
index 8d880a86d9..f028e91beb 100644
--- a/tests/ut/python/pynative_mode/ops/test_grad.py
+++ b/tests/ut/python/pynative_mode/ops/test_grad.py
@@ -89,7 +89,11 @@ def test_scalar_cast_grad():
         output = F.scalar_cast(x, input_t)
         return output
 
-    gfn = C.grad(fx_cast)(input_x)
+    @ms_function
+    def grad_fx_cast(input_x):
+        return C.grad(fx_cast)(input_x)
+
+    gfn = grad_fx_cast(input_x)
     expect_dx = 1
     assert gfn == expect_dx
 
@@ -133,25 +137,6 @@ def test_transpose_grad():
     assert np.all(gout[0].asnumpy() == expect)
 
 
-@non_graph_engine
-def test_squeeze_grad():
-    """ test_squeeze_grad """
-    input_tensor = Tensor(np.ones(shape=[3, 2, 1]))
-    squeeze = P.Squeeze(2)
-
-    def fn(x):
-        output = squeeze(x)
-        return output
-
-    out = fn(input_tensor)
-    gfn = grad_all_with_sens(fn)
-    sens = Tensor(np.ones_like(out.asnumpy()))
-    args = [input_tensor, sens]
-    gout = gfn(*args)
-    expect = np.ones([3, 2, 1])
-    assert np.all(gout[0].asnumpy() == expect)
-
-
 def test_select_grad():
     """ test_select_grad """
     select = P.Select()
@@ -176,6 +161,25 @@ def test_select_grad():
     assert np.all(gout[2].asnumpy() == expect_y)
 
 
+@non_graph_engine
+def test_squeeze_grad():
+    """ test_squeeze_grad """
+    input_tensor = Tensor(np.ones(shape=[3, 2, 1]))
+    squeeze = P.Squeeze(2)
+
+    def fn(x):
+        output = squeeze(x)
+        return output
+
+    out = fn(input_tensor)
+    gfn = grad_all_with_sens(fn)
+    sens = Tensor(np.ones_like(out.asnumpy()))
+    args = [input_tensor, sens]
+    gout = gfn(*args)
+    expect = np.ones([3, 2, 1])
+    assert np.all(gout[0].asnumpy() == expect)
+
+
 def test_SubGrad():
     """ test_SubGrad """
     input_x = Tensor(np.array([[2, 2]]))
diff --git a/tests/ut/python/pynative_mode/test_context.py b/tests/ut/python/pynative_mode/test_context.py
index 66dc0a4f58..e2d4e31412 100644
--- a/tests/ut/python/pynative_mode/test_context.py
+++ b/tests/ut/python/pynative_mode/test_context.py
@@ -118,6 +118,12 @@ def test_variable_memory_max_size():
     context.set_context(variable_memory_max_size="3GB")
 
 
+def test_print_file_path():
+    """test_print_file_path"""
+    with pytest.raises(IOError):
+        context.set_context(print_file_path="./")
+
+
 def test_set_context():
     """ test_set_context """
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
diff --git a/tests/ut/python/pynative_mode/test_framstruct.py b/tests/ut/python/pynative_mode/test_framstruct.py
index 39a4c97ab9..3b99d0dc5f 100644
--- a/tests/ut/python/pynative_mode/test_framstruct.py
+++ b/tests/ut/python/pynative_mode/test_framstruct.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.nn as nn
 from mindspore import context
 from mindspore.common import dtype as mstype
@@ -23,8 +24,6 @@ from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common.tensor import Tensor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from mindspore.ops._grad.grad_base import bprop_getters
-from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer
 from ..ut_filter import non_graph_engine
 from ....mindspore_test_framework.utils.check_gradient import (
     ms_function, check_jacobian, Tensor, NNGradChecker,
@@ -156,14 +155,14 @@ def test_if_always_true():
 @non_graph_engine
 def test_f():
     """ test_f """
-    res = mainf(3, 2)
+    res = mainf(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32))
     assert res == (2, 3)
 
 
 @non_graph_engine
 def test_grad_add_mul():
     """ test_grad_add_mul """
-    res = grad_add_mul(3, 2)
+    res = grad_add_mul(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32))
     assert res == (2, 7)
 
 
@@ -262,17 +261,19 @@ def test_if_tensor():
     assert res == Tensor(np.ones([1]).astype(np.int32) * 4)
 
 
-@ms_function
 def rec(x):
     """ rec """
     if x > 0:
         return rec(x - 1)
     return x
 
+@ms_function
+def grad_rec(input_x):
+    return C.grad(rec)(input_x)
 
 def test_grad_rec():
     """ test_grad_rec """
-    res = C.grad(rec)(10)
+    res = grad_rec(3)
     assert res == 1
 
 
@@ -282,7 +283,6 @@ def test_me_rec():
     assert res == 0
 
 
-@ms_function
 def t2_while(x, y):
     out = y - x
     i = 0
@@ -298,8 +298,10 @@ def test_while2():
 
 
 def test_grad_while2():
-    res = C.grad(t2_while)(2, 3)
-    assert res == 3
+    @ms_function
+    def df_t2_while(input_x, input_y):
+        return C.grad(t2_while)(input_x, input_y)
+    assert df_t2_while(2, 3) == 3
 
 
 def if_test(a, b):
@@ -316,7 +318,7 @@ def grad_if(x, y):
 
 def test_grad_if():
     """ test_grad_if """
-    assert grad_if(5, 4) == (3, 0)
+    assert grad_if(Tensor(5, dtype=ms.int32), Tensor(4, dtype=ms.int32)) == (3, 0)
 
 
 # While loop is not unrolled in forward and backward graphs.
@@ -421,7 +423,7 @@ def grad_while(x):
 
 def test_grad_while():
     """ test_grad_while """
-    assert grad_while(5) == (60,)
+    assert grad_while(Tensor(5, dtype=ms.int32)) == (60,)
 
 
 @ms_function
@@ -438,8 +440,10 @@ def test_factorial():
 
 
 def test_grad_factorial():
-    res = C.grad(factorial)(3)
-    assert res == 11
+    @ms_function
+    def df_factorial(x):
+        return C.grad(factorial)(x)
+    assert df_factorial(3) == 11
 
 
 @ms_function
@@ -513,7 +517,7 @@ def _for(x):
         ret = ret * i
     return ret
 
-
+@ms_function
 def grad_for(x):
     """ grad_for """
     return C.grad_all(_for)(x)
@@ -786,7 +790,10 @@ def multi_outputs(x, y):
 
 
 def test_grad_multi_outputs():
-    assert C.grad_all_with_sens(multi_outputs)(2, 3, (1, 1)) == (4, 4)
+    @ms_function
+    def df_multi_outputs(x, y):
+        return C.grad_all_with_sens(multi_outputs)(x, y, (1, 1))
+    assert df_multi_outputs(2, 3) == (4, 4)
 
 
 @ms_function
@@ -813,7 +820,7 @@ def grad_refactor_simple_1(x, y):
 
 
 def test_grad_refactor_simple_1():
-    assert C.grad_all(grad_refactor_simple_1)(2, 1) == (4, 2)
+    assert C.grad_all(grad_refactor_simple_1)(Tensor(2, dtype=ms.int32), Tensor(1, dtype=ms.int32)) == (4, 2)
 
 
 def grad_refactor_simple_2(x, y, z):
@@ -822,7 +829,10 @@ def grad_refactor_simple_2(x, y, z):
 
 
 def test_grad_refactor_simple_2():
-    assert C.grad_all(grad_refactor_simple_2)(2, 3, 0) == (7, 4, 7)
+    x = Tensor(2, dtype=ms.int32)
+    y = Tensor(3, dtype=ms.int32)
+    z = Tensor(0, dtype=ms.int32)
+    assert C.grad_all(grad_refactor_simple_2)(x, y, z) == (7, 4, 7)
 
 
 def grad_refactor_1(a, b):
@@ -835,7 +845,7 @@ def grad_refactor_1(a, b):
 
 
 def test_grad_refactor_1():
-    assert C.grad_all(grad_refactor_1)(2, 3) == (3, 2)
+    assert C.grad_all(grad_refactor_1)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (3, 2)
 
 
 def grad_refactor_2(a, b):
@@ -848,7 +858,7 @@ def grad_refactor_2(a, b):
 
 
 def test_grad_refactor_2():
-    assert C.grad_all(grad_refactor_2)(2, 3) == (27, 54)
+    assert C.grad_all(grad_refactor_2)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (27, 54)
 
 
 def grad_refactor_3(a):
@@ -859,7 +869,10 @@ def grad_refactor_3(a):
 
 
 def test_grad_refactor_3():
-    assert C.grad_all(grad_refactor_3)(3) == (3,)
+    @ms_function
+    def df_refactor_3(x):
+        return C.grad_all(grad_refactor_3)(x)
+    assert df_refactor_3(3) == (3,)
 
 
 def grad_refactor_4(a):
@@ -870,7 +883,7 @@ def grad_refactor_4(a):
 
 
 def test_grad_refactor_4():
-    assert C.grad_all(grad_refactor_4)(4) == (3,)
+    assert C.grad_all(grad_refactor_4)(Tensor(4, dtype=ms.int32)) == (3,)
 
 
 def grad_refactor_5(a):
@@ -881,7 +894,10 @@ def grad_refactor_5(a):
 
 
 def test_grad_refactor_5():
-    assert C.grad_all(grad_refactor_5)(1) == (1,)
+    @ms_function
+    def df_refactor_5(x):
+        return C.grad_all(grad_refactor_5)(x)
+    assert df_refactor_5(1) == (1,)
 
 
 def grad_refactor_6(a, b):
@@ -892,7 +908,7 @@ def grad_refactor_6(a, b):
 
 
 def test_grad_refactor_6():
-    assert C.grad_all(grad_refactor_6)(3, 2) == (3, 1)
+    assert C.grad_all(grad_refactor_6)(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) == (3, 1)
 
 
 def grad_refactor_while(x):
@@ -904,7 +920,10 @@ def grad_refactor_while(x):
 
 
 def test_grad_refactor_9():
-    assert C.grad_all(grad_refactor_while)(3) == (6,)
+    @ms_function
+    def df_refactor_while(input_x):
+        return C.grad_all(grad_refactor_while)(input_x)
+    assert df_refactor_while(3) == (6,)
 
 
 def grad_refactor__while_1(x):
@@ -919,7 +938,7 @@ def grad_refactor__while_1(x):
 
 def test_grad_refactor_10():
     """ test_grad_while """
-    assert C.grad_all(grad_refactor__while_1)(5) == (60,)
+    assert C.grad_all(grad_refactor__while_1)(Tensor(5, dtype=ms.int32)) == (60,)
 
 
 def test_grad_refactor_11():
@@ -985,7 +1004,10 @@ def grad_refactor_14(a, b):
 
 
 def test_grad_refactor_14():
-    assert C.grad_all(grad_refactor_14)(2, 3) == (3, 9)
+    @ms_function
+    def df_refactor_14(x, y):
+        return C.grad_all(grad_refactor_14)(x, y)
+    assert df_refactor_14(2, 3) == (3, 9)
 
 
 # pylint: disable=using-constant-test
@@ -1011,109 +1033,11 @@ def test_grad_if_defer_inline():
     assert grads == (Tensor(np.full([128, 96], 0.6, dtype=np.float32)),)
 
 
-def test_bprop_with_wrong_output_num():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputNum(PrimitiveWithInfer):
-        @prim_attr_register
-        def __init__(self):
-            super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum')
-
-        def __call__(self, x, y):
-            return x
-
-        def infer_shape(self, x_shape, yshape):
-            return x_shape
-
-        def infer_dtype(self, x_type, y_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputNum)
-    def get_bprop_with_wrong_output_num(self):
-        """Generate bprop for BpropWithWrongOutputNum"""
-
-        def bprop(x, y, out, dout):
-            return (dout,)
-
-        return bprop
-
-    class BpropWithWrongOutputNumCell(nn.Cell):
-        def __init__(self):
-            super(BpropWithWrongOutputNumCell, self).__init__()
-
-        def construct(self, x, y):
-            return BpropWithWrongOutputNum()(x, y)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
-
-def test_bprop_with_wrong_output_type():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputType(PrimitiveWithInfer):
-        @prim_attr_register
-        def __init__(self):
-            super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType')
-
-        def __call__(self, x):
-            return x
-
-        def infer_shape(self, x_shape):
-            return x_shape
-
-        def infer_dtype(self, x_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputType)
-    def get_bprop_with_wrong_output_type(self):
-        """Generate bprop for BpropWithWrongOutputType"""
-
-        def bprop(x, out, dout):
-            return (1,)
-
-        return bprop
-
-    class BpropWithWrongOutputTypeCell(nn.Cell):
-        def __init__(self):
-            super(BpropWithWrongOutputTypeCell, self).__init__()
-
-        def construct(self, x):
-            return BpropWithWrongOutputType()(x)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
-
-
-def test_bprop_with_wrong_output_shape():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputShape(PrimitiveWithInfer):
-        @prim_attr_register
-        def __init__(self):
-            super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape')
-
-        def __call__(self, x):
-            return x
-
-        def infer_shape(self, x_shape):
-            return x_shape
-
-        def infer_dtype(self, x_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputShape)
-    def get_bprop_with_wrong_output_shape(self):
-        """Generate bprop for BpropWithWrongOutputShape"""
-        ones = Tensor(np.ones([2,]).astype(np.int32))
-
-        def bprop(x, out, dout):
-            return (ones,)
-
-        return bprop
-
-    class BpropWithWrongOutputShapeCell(nn.Cell):
+def test_dict_const():
+    class Net(nn.Cell):
         def __init__(self):
-            super(BpropWithWrongOutputShapeCell, self).__init__()
-
-        def construct(self, x):
-            return BpropWithWrongOutputShape()(x)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputShapeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
+            super(Net, self).__init__()
+            self.res = {'1': 10}
+        def construct(self):
+            return self.res
+    Net()()
diff --git a/tests/ut/python/pynative_mode/test_hook.py b/tests/ut/python/pynative_mode/test_hook.py
index 07a7a7ad8b..f34a81ab5c 100644
--- a/tests/ut/python/pynative_mode/test_hook.py
+++ b/tests/ut/python/pynative_mode/test_hook.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 
 import mindspore.nn as nn
 import mindspore.ops.operations as P
@@ -154,22 +155,47 @@ def test_hook():
     print(loss_output.asnumpy().shape)
 
 
+bprop_debug = False
+
 class MulAdd(nn.Cell):
     def __init__(self):
         super(MulAdd, self).__init__()
 
     def construct(self, x, y):
-        return 2 * x + y
+        return 2 * x * x + y * y
 
     def bprop(self, x, y, out, dout):
-        assert (x == 1)
-        assert (y == 2)
-        assert (out == 4)
-        assert (dout == 1)
-        return 3 * dout, 2 * y
+        global bprop_debug
+        bprop_debug = True
+        return dout, 2 * y
 
 
 def test_custom_bprop():
     mul_add = MulAdd()
     mul_add.bprop_debug = True
-    assert C.grad_all(mul_add)(1, 2) == (3, 4)
+    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
+    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
+    C.grad_all(mul_add)(x, y)
+    assert bprop_debug
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+
+    def construct(self, x, y):
+        return 2 * x * x + y * y
+
+def test_grad_all():
+    net = Net()
+    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
+    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
+    res = C.grad_all(net)(x, y)
+    print(res)
+
+def test_check_input():
+    net = Net()
+    x = np.array([1, 2, 3])
+    y = np.array([2, 3, 4])
+    with pytest.raises(TypeError):
+        net(x, y)
diff --git a/tests/ut/python/pynative_mode/test_implicit_conversion.py b/tests/ut/python/pynative_mode/test_implicit_conversion.py
new file mode 100644
index 0000000000..ecaffd87f2
--- /dev/null
+++ b/tests/ut/python/pynative_mode/test_implicit_conversion.py
@@ -0,0 +1,204 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test implicit conversion """
+import numpy as np
+
+from mindspore import Tensor, nn
+from mindspore.ops import composite as C
+
+
+def test_float_tensor_and_int_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = 2
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2.1, 2.2, 2.3], [2.4, 2.5, 2.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_float_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3.3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4.3, 3.3], [3.3, 4.3]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_int_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4, 3], [3, 4]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_and_int_tensor_add():
+    x = True
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 3, 4], [5, 6, 7]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_float_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float16))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_int_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int8))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 4, 6], [8, 10, 12]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 1.2, 1.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x + y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy().astype(np.bool_)).all()
+
+
+def test_float_tensor_and_int_tensors_sub_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x - y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    print(ret)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy() * -1).all()
+
+
+def test_float16_tensor_and_float32_tensors_sub_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x - y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.int32))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    print(ret)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy() * -1).all()
+
+
+def test_float_tensor_and_int_add_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x):
+            return x + 2
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, sens):
+            return C.grad_all_with_sens(self.net)(x, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, sens)
+    assert ret[0].dtype == x.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
diff --git a/tests/ut/python/pynative_mode/test_insert_grad_of.py b/tests/ut/python/pynative_mode/test_insert_grad_of.py
index 0a28bbbb63..218a4ee253 100644
--- a/tests/ut/python/pynative_mode/test_insert_grad_of.py
+++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py
@@ -46,6 +46,7 @@ def test_InsertGradientOf_1():
         c = x * y
         return c
 
+    @ms_function
     def f(x, y):
         return C.grad_all(stop_test)(x, y)
 
@@ -80,6 +81,7 @@ def test_InsertGradientOf_2():
     def f(x, y):
         return clip_test(x, y)
 
+    @ms_function
     def fd(x, y):
         return C.grad_all(clip_test)(x, y)
 
diff --git a/tests/ut/python/pynative_mode/test_stop_gradient.py b/tests/ut/python/pynative_mode/test_stop_gradient.py
index a94f80adf0..09e4f25c54 100644
--- a/tests/ut/python/pynative_mode/test_stop_gradient.py
+++ b/tests/ut/python/pynative_mode/test_stop_gradient.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
 from mindspore import Parameter, ParameterTuple
@@ -81,16 +82,24 @@ def stop_test4(x, y):
     return e
 
 
+@ms_function
 def grad_stop_test(x, y):
     """ grad_stop_test """
     return C.grad_all(stop_test2)(x, y)
 
 
+@ms_function
 def grad_stop_test1(x, y):
     """ grad_stop_test1 """
     return C.grad_all(stop_test3)(x, y)
 
 
+@ms_function
+def grad_stop_test5(x, y):
+    """ grad_stop_test5 """
+    return C.grad_all(stop_test5)(x, y)
+
+
 def test_stop():
     """ test_stop """
     print("test_stop:", grad_stop_test(1, 1))
@@ -103,7 +112,7 @@ def test_stop1():
 
 def test_stop5():
     """ test_stop1 """
-    print("test_stop5:", C.grad_all(stop_test5)(2, 3))
+    print("test_stop5:", grad_stop_test5(2, 3))
 
 
 class GradWrap(nn.Cell):
@@ -247,7 +256,7 @@ def test_stop_gradient_4():
     def stop_test(x):
         return stop_gradient(x)
 
-    assert C.grad_all(stop_test)(1) == (0,)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (0,)
 
 
 def test_stop_gradient_5():
@@ -257,7 +266,7 @@ def test_stop_gradient_5():
         ret = x + y
         return ret
 
-    assert C.grad_all(stop_test)(1) == (1,)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
 
 
 def test_stop_gradient_6():
@@ -266,7 +275,7 @@ def test_stop_gradient_6():
         ret = stop_gradient(ret)
         return ret
 
-    assert C.grad_all(stop_test)(1, 3) == (0, 0)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (0, 0)
 
 
 class PrimWithMultiOutputs(PrimitiveWithInfer):
diff --git a/tests/ut/python/train/quant/mobilenetv2.py b/tests/ut/python/train/quant/mobilenetv2.py
deleted file mode 100644
index 163b230e1e..0000000000
--- a/tests/ut/python/train/quant/mobilenetv2.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2"""
-from mindspore import nn
-from mindspore.ops import operations as P
-
-
-def make_divisible(input_x, div_by=8):
-    return int((input_x + div_by) // div_by)
-
-
-def _conv_bn(in_channel,
-             out_channel,
-             ksize,
-             stride=1):
-    """Get a conv2d batchnorm and relu layer."""
-    return nn.SequentialCell(
-        [nn.Conv2d(in_channel,
-                   out_channel,
-                   kernel_size=ksize,
-                   stride=stride),
-         nn.BatchNorm2d(out_channel)])
-
-
-class InvertedResidual(nn.Cell):
-    def __init__(self, inp, oup, stride, expend_ratio):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = int(inp * expend_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-        if expend_ratio == 1:
-            self.conv = nn.SequentialCell([
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, group=hidden_dim),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-                nn.Conv2d(hidden_dim, oup, 1, 1),
-                nn.BatchNorm2d(oup)
-            ])
-        else:
-            self.conv = nn.SequentialCell([
-                nn.Conv2d(inp, hidden_dim, 1, 1),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, group=hidden_dim),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-
-                nn.Conv2d(hidden_dim, oup, 1, 1),
-                nn.BatchNorm2d(oup)
-            ])
-
-    def construct(self, input_x):
-        out = self.conv(input_x)
-        if self.use_res_connect:
-            out = input_x + out
-        return out
-
-
-class MobileNetV2(nn.Cell):
-    def __init__(self, num_class=1000, input_size=224, width_mul=1.):
-        super(MobileNetV2, self).__init__()
-        _ = input_size
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        inverted_residual_setting = [
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 230, 1, 1],
-        ]
-        if width_mul > 1.0:
-            last_channel = make_divisible(last_channel * width_mul)
-        self.last_channel = last_channel
-        features = [_conv_bn(3, input_channel, 3, 2)]
-
-        for t, c, n, s in inverted_residual_setting:
-            out_channel = make_divisible(c * width_mul) if t > 1 else c
-            for i in range(n):
-                if i == 0:
-                    features.append(block(input_channel, out_channel, s, t))
-                else:
-                    features.append(block(input_channel, out_channel, 1, t))
-                input_channel = out_channel
-
-        features.append(_conv_bn(input_channel, self.last_channel, 1))
-
-        self.features = nn.SequentialCell(features)
-        self.mean = P.ReduceMean(keep_dims=False)
-        self.classifier = nn.Dense(self.last_channel, num_class)
-
-    def construct(self, input_x):
-        out = input_x
-        out = self.features(out)
-        out = self.mean(out, (2, 3))
-        out = self.classifier(out)
-        return out
diff --git a/tests/ut/python/train/quant/mobilenetv2_combined.py b/tests/ut/python/train/quant/mobilenetv2_combined.py
deleted file mode 100644
index 51916192d8..0000000000
--- a/tests/ut/python/train/quant/mobilenetv2_combined.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""mobile net v2"""
-from mindspore import nn
-from mindspore.ops import operations as P
-
-
-def make_divisible(input_x, div_by=8):
-    return int((input_x + div_by) // div_by)
-
-
-def _conv_bn(in_channel,
-             out_channel,
-             ksize,
-             stride=1):
-    """Get a conv2d batchnorm and relu layer."""
-    return nn.SequentialCell(
-        [nn.Conv2dBnAct(in_channel,
-                        out_channel,
-                        kernel_size=ksize,
-                        stride=stride,
-                        has_bn=True)])
-
-
-class InvertedResidual(nn.Cell):
-    def __init__(self, inp, oup, stride, expend_ratio):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = int(inp * expend_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-        if expend_ratio == 1:
-            self.conv = nn.SequentialCell([
-                nn.Conv2dBnAct(hidden_dim,
-                               hidden_dim,
-                               3,
-                               stride,
-                               group=hidden_dim,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim, oup, 1, 1,
-                               has_bn=True)
-            ])
-        else:
-            self.conv = nn.SequentialCell([
-                nn.Conv2dBnAct(inp, hidden_dim, 1, 1,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim,
-                               hidden_dim,
-                               3,
-                               stride,
-                               group=hidden_dim,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim, oup, 1, 1,
-                               has_bn=True)
-            ])
-        self.add = P.TensorAdd()
-
-    def construct(self, input_x):
-        out = self.conv(input_x)
-        if self.use_res_connect:
-            out = self.add(input_x, out)
-        return out
-
-
-class MobileNetV2(nn.Cell):
-    def __init__(self, num_class=1000, input_size=224, width_mul=1.):
-        super(MobileNetV2, self).__init__()
-        _ = input_size
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        inverted_residual_setting = [
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 230, 1, 1],
-        ]
-        if width_mul > 1.0:
-            last_channel = make_divisible(last_channel * width_mul)
-        self.last_channel = last_channel
-        features = [_conv_bn(3, input_channel, 3, 2)]
-
-        for t, c, n, s in inverted_residual_setting:
-            out_channel = make_divisible(c * width_mul) if t > 1 else c
-            for i in range(n):
-                if i == 0:
-                    features.append(block(input_channel, out_channel, s, t))
-                else:
-                    features.append(block(input_channel, out_channel, 1, t))
-                input_channel = out_channel
-
-        features.append(_conv_bn(input_channel, self.last_channel, 1))
-
-        self.features = nn.SequentialCell(features)
-        self.mean = P.ReduceMean(keep_dims=False)
-        self.classifier = nn.DenseBnAct(self.last_channel, num_class)
-
-    def construct(self, input_x):
-        out = input_x
-        out = self.features(out)
-        out = self.mean(out, (2, 3))
-        out = self.classifier(out)
-        return out
diff --git a/tests/ut/python/train/quant/test_quant.py b/tests/ut/python/train/quant/test_quant.py
index 1a21bc2c02..39e887170c 100644
--- a/tests/ut/python/train/quant/test_quant.py
+++ b/tests/ut/python/train/quant/test_quant.py
@@ -20,7 +20,7 @@ import mindspore.context as context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.train.quant import quant as qat
-from mobilenetv2_combined import MobileNetV2
+from model_zoo.mobilenetv2_quant.src.mobilenetV2 import mobilenetV2
 
 context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 
@@ -42,7 +42,7 @@ class LeNet5(nn.Cell):
     def __init__(self, num_class=10):
         super(LeNet5, self).__init__()
         self.num_class = num_class
-        self.conv1 = nn.Conv2dBnAct(1, 6, kernel_size=5, has_bn=True, activation='relu6', pad_mode="valid")
+        self.conv1 = nn.Conv2dBnAct(1, 6, kernel_size=5, has_bn=True, activation='relu', pad_mode="valid")
         self.conv2 = nn.Conv2dBnAct(6, 16, kernel_size=5, activation='relu', pad_mode="valid")
         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
@@ -67,20 +67,19 @@ def test_qat_lenet():
     img = Tensor(np.ones((32, 1, 32, 32)).astype(np.float32))
     net = LeNet5()
     net = qat.convert_quant_network(
-        net, freeze_bn=10000, num_bits=8)
+        net, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # should load the checkpoint. mock here
     for param in net.get_parameters():
         param.init_data()
-    qat.export_geir(net, img, file_name="quant.pb")
+    qat.export(net, img, file_name="quant.pb")
 
 
 @pytest.mark.skip(reason="no `te.lang.cce` in ut env")
 def test_qat_mobile():
-    net = MobileNetV2()
+    network = mobilenetV2(num_classes=1000)
     img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
-    net = qat.convert_quant_network(
-        net, quant_delay=0, bn_fold=True, freeze_bn=10000, num_bits=8)
+    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # should load the checkpoint. mock here
-    for param in net.get_parameters():
+    for param in network.get_parameters():
         param.init_data()
-    qat.export_geir(net, img, file_name="quant.pb")
+    qat.export(network, img, file_name="quant.pb")
diff --git a/tests/ut/python/train/test_amp.py b/tests/ut/python/train/test_amp.py
index c7befb6c2b..6bb4ec5464 100644
--- a/tests/ut/python/train/test_amp.py
+++ b/tests/ut/python/train/test_amp.py
@@ -22,10 +22,10 @@ from mindspore import amp
 from mindspore import nn
 from mindspore.train import Model, ParallelMode
 from mindspore.common import dtype as mstype
-from mindspore.model_zoo.resnet import resnet50
 from ....dataset_mock import MindData
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.communication.management import init
+from tests.ut.python.model.resnet import resnet50
 
 def setup_module(module):
     _ = module
diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py
index 035ea87845..7f85695a19 100644
--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@@ -34,7 +34,7 @@ from mindspore.train.serialization import save_checkpoint, load_checkpoint, load
     _exec_save_checkpoint, export, _save_graph
 from ..ut_filter import non_graph_engine
 
-context.set_context(mode=context.GRAPH_MODE, print_file_path="print.pb")
+context.set_context(mode=context.GRAPH_MODE, print_file_path="print/print.pb")
 
 
 class Net(nn.Cell):
@@ -374,10 +374,13 @@ def test_print():
 
 
 def teardown_module():
-    files = ['parameters.ckpt', 'new_ckpt.ckpt', 'empty.ckpt', 'print.pb']
+    files = ['parameters.ckpt', 'new_ckpt.ckpt', 'empty.ckpt']
     for item in files:
         file_name = './' + item
         if not os.path.exists(file_name):
             continue
         os.chmod(file_name, stat.S_IWRITE)
         os.remove(file_name)
+    import shutil
+    if os.path.exists('./print'):
+        shutil.rmtree('./print')
diff --git a/tests/vm_impl/vm_me.py b/tests/vm_impl/vm_me.py
index 89cc1569a9..7216ec613b 100644
--- a/tests/vm_impl/vm_me.py
+++ b/tests/vm_impl/vm_me.py
@@ -441,7 +441,7 @@ def max_pool_grad(x, dout, pool_h, pool_w, stride):
     """Grad of max pooling."""
     dout = dout.transpose(0, 2, 3, 1)
     pool_size = pool_h * pool_w
-    dmax = np.zeros((dout.size, pool_size))
+    dmax = np.zeros((dout.size, pool_size), dout.dtype)
     col = im2col(x, pool_h, pool_w, stride)
     col = col.reshape(-1, pool_h * pool_w)
     arg_max = np.argmax(col, axis=1)
@@ -456,7 +456,7 @@ def max_pool_grad_with_argmax(x, dout, arg_max, pool_h, pool_w, stride):
     """Grad of max pooling with argmax."""
     dout = dout.transpose(0, 2, 3, 1)
     pool_size = pool_h * pool_w
-    dmax = np.zeros((dout.size, pool_size))
+    dmax = np.zeros((dout.size, pool_size), dout.dtype)
     dmax[np.arange(arg_max.size), arg_max.flatten()] = dout.flatten()
     dmax = dmax.reshape(dout.shape + (pool_size,))
     dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
diff --git a/third_party/icu4c/filter.json b/third_party/icu4c/filter.json
deleted file mode 100644
index b3decad8fb..0000000000
--- a/third_party/icu4c/filter.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "strategy": "additive",
-  "featureFilters": {
-    "normalization": "include"
-  }
-}
\ No newline at end of file
diff --git a/third_party/patch/pslite/ps_lite.patch001 b/third_party/patch/pslite/ps_lite.patch001
index bdc7b11a4b..e2e51e93c8 100644
--- a/third_party/patch/pslite/ps_lite.patch001
+++ b/third_party/patch/pslite/ps_lite.patch001
@@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba
  /*!
 diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
 --- ps-lite-master/include/dmlc/logging.h	2020-02-29 13:59:55.000000000 +0800
-+++ ps-lite-master-new/include/dmlc/logging.h	2020-07-01 11:58:00.015919207 +0800
-@@ -13,7 +13,7 @@
- #include <string>
- #include <vector>
- #include <memory>
--#include "./base.h"
-+//#include "./base.h"
- 
- #if DMLC_LOG_STACK_TRACE
- #include <cxxabi.h>
++++ ps-lite-master-new/include/dmlc/logging.h	2020-07-08 21:35:33.334584767 +0800
 @@ -52,7 +52,7 @@ struct Error : public std::runtime_error
  
  namespace dmlc {