!8171 【轻量级 PR】：update RELEASE.md.

Merge pull request !8171 from shenwei41/N/A
!8185 fix securec download links due to mistakes made by openeuler community
368 changed files with 17388 additions and 2715 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -106,6 +106,7 @@ endif() # NOT ENABLE_ACL

 if (ENABLE_SERVING)
    add_subdirectory(serving)
+    add_subdirectory(serving/example/cpp_client)
 endif()

 if (NOT ENABLE_ACL)
--- a/README.md
+++ b/README.md
@ -75,7 +75,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

    ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.6.0-cp37-cp37m-linux_x86_64.whl
    ```

 2. Run the following command to verify the install.
--- a/RELEASE.md
+++ b/RELEASE.md
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit f60af9df4220bf3db5de2b224418953c0dc1f625
+Subproject commit 5c0e3d2ffb6ba7650453c3b11163237a43d206d6
--- a/build.sh
+++ b/build.sh
@ -491,9 +491,9 @@ build_predict()

    cd "${BASEPATH}/predict/output/"
    if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
-      tar -cf MSPredict-0.5.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.6.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
    elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
-      tar -cf MSPredict-0.5.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.6.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
    fi
    echo "success to build predict project!"
 }
--- a/cmake/external_libs/icu4c.cmake
+++ b/cmake/external_libs/icu4c.cmake
@ -8,7 +8,7 @@ else()
            VER 67.1
            LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
            URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
-            MD5 0c2662a2b0bc80b0eb56495205247c8f
+            MD5 fd525fb47d8827b0b7da78b51dd2d93f
            CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
            )
    include_directories(${icu4c_INC})
--- a/cmake/external_libs/jpeg_turbo.cmake
+++ b/cmake/external_libs/jpeg_turbo.cmake
@ -12,6 +12,7 @@ mindspore_add_pkg(jpeg_turbo
        URL https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.4.tar.gz
        MD5 44c43e4a9fb352f47090804529317c88
        CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release -DCMAKE_SKIP_RPATH=TRUE
+        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/jpeg_turbo/jpeg_turbo.patch001
        )
 include_directories(${jpeg_turbo_INC})
 add_library(mindspore::jpeg_turbo ALIAS jpeg_turbo::jpeg)
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -278,6 +278,13 @@ if (ENABLE_SERVING)
        COMPONENT mindspore
    )

+    file(GLOB MS_SERVING_PY_LIST ${CMAKE_SOURCE_DIR}/serving/*.py)
+    install(
+        FILES ${MS_SERVING_PY_LIST}
+        DESTINATION ${INSTALL_PY_DIR}
+        COMPONENT mindspore
+    )
+
    install(
        TARGETS inference
        DESTINATION ${INSTALL_LIB_DIR}
--- a/docker/mindspore-cpu/0.6.0-beta/Dockerfile
+++ b/docker/mindspore-cpu/0.6.0-beta/Dockerfile
@ -0,0 +1,67 @@
+FROM ubuntu:18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV PATH /usr/local/bin:$PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install MindSpore cpu whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.6.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-gpu/0.6.0-beta/Dockerfile
+++ b/docker/mindspore-gpu/0.6.0-beta/Dockerfile
@ -0,0 +1,83 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex \
+    libnccl2=2.4.8-1+cuda10.1 \
+    libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+    && tar -xvf openmpi-3.1.5.tar.gz \
+    && cd /tmp/openmpi-3.1.5 \
+    && mkdir -p ${OMPI_ROOT_PATH} \
+    && ./configure --prefix=${OMPI_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -rf /tmp/openmpi-3.1.5 \
+    && rm -f /tmp/openmpi-3.1.5.tar.gz
+
+# Install MindSpore cuda-10.1 whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.6.0-cp37-cp37m-linux_x86_64.whl
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009
+Subproject commit 885af56694eff438a4ea079c0c34de30993f1473
--- a/mindspore/_extends/builtin_operations.py
+++ b/mindspore/_extends/builtin_operations.py
@ -14,7 +14,10 @@
 # ============================================================================
 """builtin_operations"""
 import numpy as np
+from mindspore.ops import functional as F
+from mindspore.ops import composite as C
 from mindspore.common.tensor import Tensor
+import mindspore.common.dtype as mstype
 from mindspore.common.dtype import dtype_to_nptype, get_py_obj_dtype


@ -113,6 +116,7 @@ def bool_or(x, y):
    """Implement `bool_or`."""
    return x or y

+
 def vm_compare(*args):
    """Implement `vm_compare` for tensor."""
    obj_str = args[-1]
@ -141,10 +145,12 @@ def list_len(x):
    """Implement `list_len`."""
    return len(x)

+
 def Depend(value, expr):
    """Implement `Depend`."""
    return value

+
 # only used in PyNative mode
 def make_ref(key, value, ref):
    return value
@ -171,3 +177,16 @@ def tuple_to_array(x):
 def stop_gradient(x):
    """Implement `stop_gradient`."""
    return x
+
+
+hyper_map = C.HyperMap()
+
+
+def mixed_precision_cast(dst_type, x):
+    """Implement `mixed_precision_cast`."""
+    def cast_inner(data):
+        if isinstance(data, Tensor) and data.dtype in (mstype.float32, mstype.float16):
+            return F.cast(data, dst_type)
+        return data
+
+    return hyper_map(cast_inner, x)
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@ -459,27 +459,27 @@ class Parser:
        logger.debug("ops info = %r", ops_info)
        return ops_info

-    def analyze_super(self, father_class_node, subclass_instance):
+    def analyze_super(self, class_type_node, subclass_instance):
        """Analyze super and return a class instance."""
-        father_class = None
-        if father_class_node is None:
-            father_class = type(subclass_instance)
-        if isinstance(father_class_node, ast.Name):
-            father_class_name = getattr(father_class_node, 'id')
-            father_class = self.global_namespace[father_class_name]
-        if isinstance(father_class_node, ast.Attribute):
-            value = getattr(father_class_node, 'value')
-            attr = getattr(father_class_node, 'attr')
-            module_name = getattr(value, 'id')
-            father_class_module = self.global_namespace[module_name]
-            father_class = getattr(father_class_module, attr)
-        if father_class is None:
-            raise ValueError("When call 'super', the father class is None.")
-        if not isinstance(subclass_instance, father_class):
+        sub_class = type(subclass_instance)
+        if class_type_node is None:
+            return super(sub_class, subclass_instance)
+        if isinstance(class_type_node, ast.Name):
+            class_name = getattr(class_type_node, 'id')
+        elif isinstance(class_type_node, ast.Attribute):
+            class_name = getattr(class_type_node, 'attr')
+        else:
+            raise ValueError(f"When call 'super', the first arg should be a class type, "
+                             f"but got {class_type_node.__class__.__name__}.")
+
+        target_father_class = None
+        for class_element in sub_class.mro():
+            if class_element.__name__ == class_name:
+                target_father_class = class_element
+                break
+        if target_father_class is None:
            raise ValueError("When call 'super', the second arg should be an instance of first arg.")
-
-        target_class_instance = super(father_class, subclass_instance)
-        return target_class_instance
+        return super(target_father_class, subclass_instance)

    def get_location(self, node):
        """
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@ -132,7 +132,9 @@ def while_cond(x):
@constexpr
 def check_type_same(x_type, base_type):
    """Check x_type is same as base_type."""
-    return mstype.issubclass_(x_type, base_type)
+    if mstype.issubclass_(x_type, base_type):
+        return True
+    raise TypeError(f"The arg 'x' should be a {base_type}, but got {x_type}.")


@constexpr
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
@ -31,8 +31,9 @@ class PServerKernel {
  ~PServerKernel() = default;
  PServerKernel(const PServerKernel &) = delete;
  PServerKernel &operator=(const PServerKernel &) = delete;
-
  virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
+  virtual void InitKernel(const CNodePtr &cnode,
+                          const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
  virtual void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
  virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                       const std::vector<AddressPtr> &outputs) = 0;
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
@ -33,8 +33,9 @@ class PullKernel : public CPUKernel {
  ~PullKernel() override = default;

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &) {
-    // If the paramter is embedding table, don't Pull from PServer.
-    if (param_name_.find("embedding") == std::string::npos && param_name_.find("wide_w") == std::string::npos) {
+    bool init_in_server = mindspore::parallel::ps::Worker<float>::GetInstance().GetParamInitInServer(param_name_);
+    // If init_in_server, forward kernel should run in server too.
+    if (!init_in_server) {
      parallel::ps::Worker<T>::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
    }
    return true;
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
@ -43,7 +43,10 @@ class PushKernel : public CPUKernel {
      sizes.push_back(SizeToInt(input->size) / sizeof(T));
    }
    parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes);
-    memcpy_s(outputs[0]->addr, sizeof(size_t), &key_, sizeof(size_t));
+    auto ret = memcpy_s(outputs[0]->addr, sizeof(size_t), &key_, sizeof(size_t));
+    if (ret != EOK) {
+      MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
+    }
    return true;
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
@ -23,7 +23,7 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 void SparseApplyAdamPSKernel::InitKernel(
-  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
  std::vector<size_t> &var_shape = *(shape_vec[0]);
  std::vector<size_t> &m_shape = *(shape_vec[1]);
@ -55,11 +55,9 @@ void SparseApplyAdamPSKernel::InitKernel(
  if (grad_shape[0] != indices_size_) {
    MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
  }
-  /*
-  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
-    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
+  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
+    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
  }
-  */
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
@ -30,7 +30,8 @@ class SparseApplyAdamPSKernel : public SparseApplyAdamCPUKernel, public PServerK
  SparseApplyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
  ~SparseApplyAdamPSKernel() override = default;

-  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void InitKernel(const CNodePtr &cnode,
+                  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
@ -20,7 +20,7 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 void SparseApplyFtrlPSKernel::InitKernel(
-  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
  std::vector<size_t> var_shape = *(shape_vec[0]);
  std::vector<size_t> accum_shape = *(shape_vec[1]);
@ -46,10 +46,22 @@ void SparseApplyFtrlPSKernel::InitKernel(
  if (grad_shape[0] != indices_size_) {
    MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
  }
-  lr_ = 0.01;
-  l1_ = 1e-8;
-  l2_ = 1e-8;
-  lr_power_ = -0.5;
+  lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr");
+  if (lr_ <= 0) {
+    MS_LOG(EXCEPTION) << "lr should be a positive scalar";
+  }
+  l1_ = AnfAlgo::GetNodeAttr<float>(cnode, "l1");
+  if (l1_ < 0) {
+    MS_LOG(EXCEPTION) << "l1 should be a non-negative scalar";
+  }
+  l2_ = AnfAlgo::GetNodeAttr<float>(cnode, "l2");
+  if (l2_ < 0) {
+    MS_LOG(EXCEPTION) << "l2 should be a non-negative scalar";
+  }
+  lr_power_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr_power");
+  if (lr_power_ > 0) {
+    MS_LOG(EXCEPTION) << "lr_power should be a non-positive scalar";
+  }
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
@ -30,7 +30,8 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK
  SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
  ~SparseApplyFtrlPSKernel() override = default;

-  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void InitKernel(const CNodePtr &cnode,
+                  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;

  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.cc
@ -23,7 +23,7 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 void SparseApplyLazyAdamPSKernel::InitKernel(
-  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
  std::vector<size_t> &var_shape = *(shape_vec[0]);
  std::vector<size_t> &m_shape = *(shape_vec[1]);
@ -55,11 +55,9 @@ void SparseApplyLazyAdamPSKernel::InitKernel(
  if (grad_shape[0] != indices_size_) {
    MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
  }
-  /*
-  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
-    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
+  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
+    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
  }
-  */
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h
@ -30,7 +30,8 @@ class SparseApplyLazyAdamPSKernel : public SparseApplyLazyAdamCPUKernel, public
  SparseApplyLazyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
  ~SparseApplyLazyAdamPSKernel() override = default;

-  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void InitKernel(const CNodePtr &cnode,
+                  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_grad_impl.cu
@ -0,0 +1,226 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include "maxpool_with_argmax_grad_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
+#include "include/cuda_fp16.h"
+
+template <typename T, typename S>
+__global__ void MaxPoolWithArgmaxGrad(const T* x,
+                                      const T* dy,
+                                      const S* index,
+                                      const int n,
+                                      const int c,
+                                      const int xHeight,
+                                      const int xWidth,
+                                      const int dyHeight,
+                                      const int dyWidth,
+                                      const int windowHeight,
+                                      const int windowWidth,
+                                      const int strideHeight,
+                                      const int strideWidth,
+                                      const int padTop,
+                                      const int padLeft,
+                                      const int xNCHW,
+                                      const int xCHW,
+                                      const int xHW,
+                                      const int dyCHW,
+                                      const int dyHW,
+                                      T* dx) {
+  for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x;
+       pos < (xNCHW);
+       pos += blockDim.x * gridDim.x) {
+    const int posn = pos / xCHW;
+    const int posc = pos / xHW % c;
+    const int posh = pos / xHeight % xHeight;
+    const int posw = pos % xWidth;
+    const S posIdx = posh*xWidth + posw;
+    int hstart = posh+padTop;
+    if (hstart < windowHeight) {
+      hstart = 0;
+    } else {
+      hstart = (hstart-windowHeight)/strideHeight + 1;
+    }
+    int wstart = posw+padLeft;
+    if (wstart < windowWidth) {
+      wstart = 0;
+    } else {
+      wstart = (wstart-windowWidth)/strideWidth + 1;
+    }
+    const int hend = min((posh+padTop)/strideHeight +1, dyHeight);
+    const int wend = min((posw+padLeft)/strideWidth +1, dyWidth);
+    const int channelStart = posn*dyCHW + posc*dyHW;
+    T dySum = static_cast<T>(0.0);
+    for (int hcur = hstart; hcur < hend; ++hcur) {
+      for (int wcur = wstart; wcur < wend; ++wcur) {
+        const int curIdx = hcur*dyWidth + wcur;
+        S maxIdx = index[channelStart+curIdx];
+        if (maxIdx == posIdx) {
+          dySum += dy[channelStart+curIdx];
+        }
+      }
+    }
+    dx[pos] = dySum;
+  }
+  return;
+}
+
+template <>
+__global__ void MaxPoolWithArgmaxGrad(const half* x,
+                                      const half* dy,
+                                      const int* index,
+                                      const int n,
+                                      const int c,
+                                      const int xHeight,
+                                      const int xWidth,
+                                      const int dyHeight,
+                                      const int dyWidth,
+                                      const int windowHeight,
+                                      const int windowWidth,
+                                      const int strideHeight,
+                                      const int strideWidth,
+                                      const int padTop,
+                                      const int padLeft,
+                                      const int xNCHW,
+                                      const int xCHW,
+                                      const int xHW,
+                                      const int dyCHW,
+                                      const int dyHW,
+                                      half* dx) {
+  for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x;
+       pos < (xNCHW);
+       pos += blockDim.x * gridDim.x) {
+    const int posn = pos / xCHW;
+    const int posc = pos / xHW % c;
+    const int posh = pos / xHeight % xHeight;
+    const int posw = pos % xWidth;
+    const int posIdx = posh*xWidth + posw;
+    int hstart = posh+padTop;
+    if (hstart < windowHeight) {
+      hstart = 0;
+    } else {
+      hstart = (hstart-windowHeight)/strideHeight + 1;
+    }
+    int wstart = posw+padLeft;
+    if (wstart < windowWidth) {
+      wstart = 0;
+    } else {
+      wstart = (wstart-windowWidth)/strideWidth + 1;
+    }
+    const int hend = min((posh+padTop)/strideHeight +1, dyHeight);
+    const int wend = min((posw+padLeft)/strideWidth +1, dyWidth);
+    const int channelStart = posn*dyCHW + posc*dyHW;
+    float dySum = 0.0f;
+    for (int hcur = hstart; hcur < hend; ++hcur) {
+      for (int wcur = wstart; wcur < wend; ++wcur) {
+        const int curIdx = hcur*dyWidth + wcur;
+        int maxIdx = index[channelStart+curIdx];
+        if (maxIdx == posIdx) {
+          dySum += __half2float(dy[channelStart+curIdx]);
+        }
+      }
+    }
+    dx[pos] = __float2half(dySum);
+  }
+  return;
+}
+
+template <typename T, typename S>
+void CalMaxPoolWithArgmaxGrad(const T* x,
+                              const T* dy,
+                              const S* index,
+                              const int n,
+                              const int c,
+                              const int xHeight,
+                              const int xWidth,
+                              const int dyHeight,
+                              const int dyWidth,
+                              const int windowHeight,
+                              const int windowWidth,
+                              const int strideHeight,
+                              const int strideWidth,
+                              const int padTop,
+                              const int padLeft,
+                              T* dx,
+                              cudaStream_t cuda_stream) {
+  const int xHW = xHeight*xWidth;
+  const int xCHW = c*xHW;
+  const int xNCHW = n*xCHW;
+  const int dyHW = dyHeight*dyWidth;
+  const int dyCHW = c*dyHW;
+  MaxPoolWithArgmaxGrad<<<GET_BLOCKS(xNCHW),
+                          GET_THREADS,
+                          0,
+                          cuda_stream>>>(
+                            x,
+                            dy,
+                            index,
+                            n,
+                            c,
+                            xHeight,
+                            xWidth,
+                            dyHeight,
+                            dyWidth,
+                            windowHeight,
+                            windowWidth,
+                            strideHeight,
+                            strideWidth,
+                            padTop,
+                            padLeft,
+                            xNCHW,
+                            xCHW,
+                            xHW,
+                            dyCHW,
+                            dyHW,
+                            dx);
+  return;
+}
+
+template void CalMaxPoolWithArgmaxGrad<float, int>(const float* x,
+                                                    const float* dy,
+                                                    const int* index,
+                                                    const int n,
+                                                    const int c,
+                                                    const int xHeight,
+                                                    const int xWidth,
+                                                    const int dyHeight,
+                                                    const int dyWidth,
+                                                    const int windowHeight,
+                                                    const int windowWidth,
+                                                    const int strideHeight,
+                                                    const int strideWidth,
+                                                    const int padTop,
+                                                    const int padLeft,
+                                                    float* dx,
+                                                    cudaStream_t cuda_stream);
+template void CalMaxPoolWithArgmaxGrad<half, int>(const half* x,
+                                                    const half* dy,
+                                                    const int* index,
+                                                    const int n,
+                                                    const int c,
+                                                    const int xHeight,
+                                                    const int xWidth,
+                                                    const int dyHeight,
+                                                    const int dyWidth,
+                                                    const int windowHeight,
+                                                    const int windowWidth,
+                                                    const int strideHeight,
+                                                    const int strideWidth,
+                                                    const int padTop,
+                                                    const int padLeft,
+                                                    half* dx,
+                                                    cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_grad_impl.cuh
@ -0,0 +1,25 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_
+template <typename T, typename S>
+void CalMaxPoolWithArgmaxGrad(const T* x, const T* dy, const S* index, const int n, const int c, const int xHeight,
+                              const int xWidth, const int dyHeight, const int dyWidth, const int windowHeight,
+                              const int windowWidth, const int strideHeight, const int strideWidth, const int padTop,
+                              const int padLeft, T* dx, cudaStream_t cuda_stream);
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_
--- a/Show More
+++ b/Show More