From 769243673a8dc9a16289f18c43bc59ad1cb60b1d Mon Sep 17 00:00:00 2001
From: zhangxinfeng3 <zhangxinfeng3@huawei.com>
Date: Mon, 8 Mar 2021 17:24:39 +0800
Subject: [PATCH] Add some modules of Sponge

---
 mindspore/ccsrc/CMakeLists.txt                |    3 +-
 .../cuda_impl/sponge/common/getcenter_impl.cu |   45 +
 .../sponge/common/getcenter_impl.cuh          |   26 +
 .../sponge/common/mdtemperature_impl.cu       |   51 +
 .../sponge/common/mdtemperature_impl.cuh      |   25 +
 .../gpu/cuda_impl/sponge/common_sponge.cuh    |  163 +-
 .../dihedral/dihedral_atom_energy_impl.cu     |    2 -
 .../sponge/dihedral/dihedral_energy_impl.cu   |    2 -
 .../sponge/dihedral/dihedral_force_impl.cu    |    1 -
 .../gpu/cuda_impl/sponge/lj/lj_energy_impl.cu |  102 +
 .../cuda_impl/sponge/lj/lj_energy_impl.cuh    |   27 +
 .../gpu/cuda_impl/sponge/lj/lj_force_impl.cu  |  116 +
 .../gpu/cuda_impl/sponge/lj/lj_force_impl.cuh |   27 +
 .../lj/lj_force_with_pme_direct_force_impl.cu |  132 +
 .../lj_force_with_pme_direct_force_impl.cuh   |   28 +
 .../nb14/dihedral_14_cf_atom_energy_impl.cu   |   80 +
 .../nb14/dihedral_14_cf_atom_energy_impl.cuh  |   25 +
 .../sponge/nb14/dihedral_14_cf_energy_impl.cu |   80 +
 .../nb14/dihedral_14_cf_energy_impl.cuh       |   25 +
 .../nb14/dihedral_14_lj_atom_energy_impl.cu   |  102 +
 .../nb14/dihedral_14_lj_atom_energy_impl.cuh  |   26 +
 ...al_14_lj_cf_force_with_atom_energy_impl.cu |  140 +
 ...l_14_lj_cf_force_with_atom_energy_impl.cuh |   27 +
 .../sponge/nb14/dihedral_14_lj_energy_impl.cu |  102 +
 .../nb14/dihedral_14_lj_energy_impl.cuh       |   27 +
 .../sponge/nb14/dihedral_14_lj_force_impl.cu  |  111 +
 .../sponge/nb14/dihedral_14_lj_force_impl.cuh |   26 +
 ...ihedral_14_lj_force_with_direct_cf_impl.cu |  124 +
 ...hedral_14_lj_force_with_direct_cf_impl.cuh |   26 +
 .../neighbor_list/neighbor_list_impl.cu       |  419 +++
 .../neighbor_list/neighbor_list_impl.cuh      |   58 +
 .../nvtit/md_iteration_leap_frog_impl.cu      |  139 +
 .../nvtit/md_iteration_leap_frog_impl.cuh     |   27 +
 .../gpu/cuda_impl/sponge/pme/pme_common.cuh   |  230 ++
 .../cuda_impl/sponge/pme/pme_energy_impl.cu   |  234 ++
 .../cuda_impl/sponge/pme/pme_energy_impl.cuh  |   30 +
 .../sponge/pme/pme_excluded_force_impl.cu     |  102 +
 .../sponge/pme/pme_excluded_force_impl.cuh    |   26 +
 .../sponge/pme/pme_reciprocal_force_impl.cu   |  204 ++
 .../sponge/pme/pme_reciprocal_force_impl.cuh  |   28 +
 .../gpu/sponge/common/getcenter_kernel.cc     |   27 +
 .../gpu/sponge/common/getcenter_kernel.h      |   89 +
 .../gpu/sponge/common/mdtemperature_kernel.cc |   31 +
 .../gpu/sponge/common/mdtemperature_kernel.h  |   96 +
 .../gpu/sponge/lj/lj_energy_kernel.cc         |   34 +
 .../gpu/sponge/lj/lj_energy_kernel.h          |  130 +
 .../gpu/sponge/lj/lj_force_kernel.cc          |   34 +
 .../gpu/sponge/lj/lj_force_kernel.h           |  129 +
 .../lj_force_with_pme_direct_force_kernel.cc  |   34 +
 .../lj_force_with_pme_direct_force_kernel.h   |  133 +
 .../nb14/dihedral_14_cf_atom_energy_kernel.cc |   34 +
 .../nb14/dihedral_14_cf_atom_energy_kernel.h  |  114 +
 .../nb14/dihedral_14_cf_energy_kernel.cc      |   34 +
 .../nb14/dihedral_14_cf_energy_kernel.h       |  114 +
 .../nb14/dihedral_14_lj_atom_energy_kernel.cc |   36 +
 .../nb14/dihedral_14_lj_atom_energy_kernel.h  |  123 +
 ..._14_lj_cf_force_with_atom_energy_kernel.cc |   38 +
 ...l_14_lj_cf_force_with_atom_energy_kernel.h |  132 +
 .../nb14/dihedral_14_lj_energy_kernel.cc      |   36 +
 .../nb14/dihedral_14_lj_energy_kernel.h       |  124 +
 .../nb14/dihedral_14_lj_force_gpu_kernel.cc   |   36 +
 .../nb14/dihedral_14_lj_force_gpu_kernel.h    |  122 +
 ...edral_14_lj_force_with_direct_cf_kernel.cc |   37 +
 ...hedral_14_lj_force_with_direct_cf_kernel.h |  130 +
 .../neighbor_list_update_kernel.cc            |   45 +
 .../neighbor_list_update_kernel.h             |  170 +
 .../nvtit/md_iteration_leap_frog_kernel.cc    |   32 +
 .../nvtit/md_iteration_leap_frog_kernel.h     |  115 +
 .../gpu/sponge/pme/pme_energy_kernel.cc       |   38 +
 .../gpu/sponge/pme/pme_energy_kernel.h        |  147 +
 .../sponge/pme/pme_excluded_force_kernel.cc   |   32 +
 .../sponge/pme/pme_excluded_force_kernel.h    |   95 +
 .../sponge/pme/pme_reciprocal_force_kernel.cc |   29 +
 .../sponge/pme/pme_reciprocal_force_kernel.h  |  119 +
 mindspore/ccsrc/cxx_api/CMakeLists.txt        |    3 +-
 mindspore/ops/operations/__init__.py          |   27 +-
 mindspore/ops/operations/sponge_ops.py        | 2890 ++++++++++++-----
 77 files changed, 7843 insertions(+), 914 deletions(-)
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc
 create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h

diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 95ccf58cb8..9fd1c55c94 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -383,7 +383,8 @@ if(ENABLE_GPU)
                           ${CUDNN_LIBRARY_PATH}
                           ${CUDA_PATH}/lib64/libcudart.so
                           ${CUDA_PATH}/lib64/stubs/libcuda.so
-                          ${CUDA_PATH}/lib64/libcusolver.so)
+                          ${CUDA_PATH}/lib64/libcusolver.so
+                          ${CUDA_PATH}/lib64/libcufft.so)
     if(ENABLE_MPI)
         set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
     endif()
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu
new file mode 100644
index 0000000000..2e283d297e
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh"
+
+__global__ void GetCenterOfGeometryKernel(const int center_numbers, float center_numbers_inverse,
+                                          const int *center_atoms, const VECTOR *crd, VECTOR *center_of_geometry) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < center_numbers) {
+    int atom_i = center_atoms[i];
+    VECTOR temp = center_numbers_inverse * crd[atom_i];
+    atomicAdd(&center_of_geometry[0].x, temp.x);
+    atomicAdd(&center_of_geometry[0].y, temp.y);
+    atomicAdd(&center_of_geometry[0].z, temp.z);
+  }
+}
+
+void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms,
+                         const float *crd_f, float *center_of_geometry_f, cudaStream_t stream) {
+  VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
+  VECTOR *center_of_geometry = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(center_of_geometry_f));
+  GetCenterOfGeometryKernel<<<ceilf(static_cast<float>(center_numbers) / 32), 32, 0, stream>>>(
+    center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, float *crd_f,
+                         float *center_of_geometry_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh
new file mode 100644
index 0000000000..57a9dce898
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms,
+                         const float *crd_f, float *center_of_geometry_f, cudaStream_t stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu
new file mode 100644
index 0000000000..e2f9b921e4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh"
+
+__global__ void MDTemperatureKernel(const int residue_numbers, const int *start, const int *end, const VECTOR *atom_vel,
+                                    const float *atom_mass, float *ek) {
+  int residue_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (residue_i < residue_numbers) {
+    VECTOR momentum = {0., 0., 0.};
+    float res_mass = 0.;
+    int s = start[residue_i];
+    int e = end[residue_i];
+    float mass_lin;
+    for (int atom_i = s; atom_i < e; atom_i = atom_i + 1) {
+      mass_lin = atom_mass[atom_i];
+
+      momentum.x = momentum.x + mass_lin * atom_vel[atom_i].x;
+      momentum.y = momentum.y + mass_lin * atom_vel[atom_i].y;
+      momentum.z = momentum.z + mass_lin * atom_vel[atom_i].z;
+      res_mass = res_mass + mass_lin;
+    }
+    ek[residue_i] = 0.5 * (momentum.x * momentum.x + momentum.y * momentum.y + momentum.z * momentum.z) / res_mass *
+                    2. / 3. / CONSTANT_kB / residue_numbers;
+  }
+}
+
+void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
+                   const float *atom_mass, float *ek, cudaStream_t stream) {
+  VECTOR *atom_vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(atom_vel_f));
+  MDTemperatureKernel<<<ceilf(static_cast<float>(residue_numbers) / 32), 32, 0, stream>>>(residue_numbers, start, end,
+                                                                                          atom_vel, atom_mass, ek);
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
+                   const float *atom_mass, float *ek, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh
new file mode 100644
index 0000000000..93eb79d57f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
+                   const float *atom_mass, float *ek, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh
index d63f930862..20fedff136 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh
@@ -14,31 +14,59 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_
-#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <time.h>
+
 #include <curand_kernel.h>
 
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
+#include <cufft.h>
 #include "runtime/device/gpu/cuda_common.h"
 
 #define CONSTANT_Pi 3.1415926535897932
+#define TWO_DIVIDED_BY_SQRT_PI 1.1283791670218446
+#define CONSTANT_kB 0.00198716
+static dim3 thread_LJ(8, 32);
 
 struct VECTOR {
   float x;
   float y;
   float z;
 };
+struct INT_VECTOR {
+  int int_x;
+  int int_y;
+  int int_z;
+};
 struct UNSIGNED_INT_VECTOR {
   unsigned int uint_x;
   unsigned int uint_y;
   unsigned int uint_z;
 };
+struct NEIGHBOR_LIST {
+  int atom_numbers;
+  int *atom_serial;
+};
+struct UINT_VECTOR_LJ_TYPE {
+  unsigned int uint_x;
+  unsigned int uint_y;
+  unsigned int uint_z;
+  int LJ_type;
+  float charge;
+};
+
+struct GRID_BUCKET {
+  int *atom_serial;
+};
+struct GRID_POINTER {
+  int *grid_serial;
+};
 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a,
                                                                    const UNSIGNED_INT_VECTOR uvec_b,
                                                                    const VECTOR scaler) {
@@ -48,6 +76,15 @@ __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNE
   dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
   return dr;
 }
+__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a,
+                                                                   const UINT_VECTOR_LJ_TYPE uvec_b,
+                                                                   const VECTOR scaler) {
+  VECTOR dr;
+  dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x;
+  dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y;
+  dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
+  return dr;
+}
 
 __device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) {
   VECTOR vec;
@@ -91,4 +128,124 @@ __device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VEC
   return vec;
 }
 
-#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMON_H_
+__global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
+                                                      int *nl_atom_serial, NEIGHBOR_LIST *nl) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
+    nl[i].atom_numbers = nl_atom_numbers[i];
+    nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
+  }
+}
+
+static inline bool Malloc_Safely(void **address, size_t size) {
+  address[0] = NULL;
+  address[0] = reinterpret_cast<void *>(malloc(size));
+  if (address[0] != NULL) {
+    return true;
+  } else {
+    printf("malloc failed!\n");
+    getchar();
+    return false;
+  }
+}
+static inline bool Cuda_Malloc_Safely(void **address, size_t size) {
+  cudaError_t cuda_error = cudaMalloc(&address[0], size);
+  if (cuda_error == 0) {
+    return true;
+  } else {
+    printf("cudaMalloc failed! error %d\n", cuda_error);
+    getchar();
+    return false;
+  }
+}
+
+__global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR *crd,
+                                                 UINT_VECTOR_LJ_TYPE *new_crd, const int *LJ_type,
+                                                 const float *charge) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    new_crd[atom_i].uint_x = crd[atom_i].uint_x;
+    new_crd[atom_i].uint_y = crd[atom_i].uint_y;
+    new_crd[atom_i].uint_z = crd[atom_i].uint_z;
+    new_crd[atom_i].LJ_type = LJ_type[atom_i];
+    new_crd[atom_i].charge = charge[atom_i];
+  }
+}
+
+__global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
+                                   float4 *rand_float4) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < float4_numbers) {
+    rand_float4[i] = curand_normal4(&rand_state[i]);
+  }
+}
+
+__global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
+                                                const int seed) {
+  int id = threadIdx.x + blockIdx.x * blockDim.x;
+  /* Each thread gets same seed, a different sequence
+  number, no offset */
+  if (id < float4_numbers) {
+    curand_init(seed, id, 0, &rand_state[id]);
+  }
+}
+
+__global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = replace_element;
+  }
+}
+
+__global__ static void Reset_List(const int element_numbers, float *list, const float replace_element) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = replace_element;
+  }
+}
+
+__global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) {
+  if (threadIdx.x == 0) {
+    sum[0] = 0.;
+  }
+  __syncthreads();
+  float lin = 0.;
+  for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) {
+    lin = lin + list[i];
+  }
+  atomicAdd(sum, lin);
+}
+
+__global__ static void Scale_List(const int element_numbers, float *list, float scaler) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = list[i] * scaler;
+  }
+}
+
+__global__ static void Copy_List(const int element_numbers, const int *origin_list, int *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+__global__ static void Copy_List(const int element_numbers, const float *origin_list, float *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+
+__global__ static void Print(const size_t size, const float *input_x) {
+  for (size_t i = 0; i < size; i++) {
+    printf("%f\n", input_x[i]);
+  }
+  return;
+}
+__global__ static void Print(const size_t size, const int *input_x) {
+  for (size_t i = 0; i < size; i++) {
+    printf("%d\n", input_x[i]);
+  }
+  return;
+}
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu
index c4e6051d5f..a3a5a2e4d2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu
@@ -29,8 +29,6 @@ __global__ void DihedralAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_IN
     int atom_k = atom_c[dihedral_i];
     int atom_l = atom_d[dihedral_i];
 
-    int temp_ipn = ipn[dihedral_i];
-
     float temp_pk = pk[dihedral_i];
     float temp_pn = pn[dihedral_i];
     float temp_gamc = gamc[dihedral_i];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu
index 7e24136002..4bf4127248 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu
@@ -29,8 +29,6 @@ __global__ void DihedralEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VE
     int atom_k = atom_c[dihedral_i];
     int atom_l = atom_d[dihedral_i];
 
-    int temp_ipn = ipn[dihedral_i];
-
     float temp_pk = pk[dihedral_i];
     float temp_pn = pn[dihedral_i];
     float temp_gamc = gamc[dihedral_i];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu
index 959bca09d2..f7463c717a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu
@@ -31,7 +31,6 @@ __global__ void DihedralForceKernel(int dihedral_numbers, const UNSIGNED_INT_VEC
 
     int temp_ipn = ipn[dihedral_i];
 
-    float temp_pk = pk[dihedral_i];
     float temp_pn = pn[dihedral_i];
     float temp_gamc = gamc[dihedral_i];
     float temp_gams = gams[dihedral_i];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu
new file mode 100644
index 0000000000..5538410eb5
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void LJ_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                               const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
+                               const float cutoff_square, float *lj_ene) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    int N = nl_i.atom_numbers;
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
+    VECTOR dr;
+    float dr2;
+    float dr_2;
+    float dr_4;
+    float dr_6;
+    float ene_lin = 0.;
+
+    int x, y;
+    int atom_pair_LJ_type;
+    for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
+      atom_j = nl_i.atom_serial[j];
+      r2 = uint_crd[atom_j];
+
+      int_x = r2.uint_x - r1.uint_x;
+      int_y = r2.uint_y - r1.uint_y;
+      int_z = r2.uint_z - r1.uint_z;
+      dr.x = boxlength[0].x * int_x;
+      dr.y = boxlength[0].y * int_y;
+      dr.z = boxlength[0].z * int_z;
+
+      dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+      if (dr2 < cutoff_square) {
+        dr_2 = 1. / dr2;
+        dr_4 = dr_2 * dr_2;
+        dr_6 = dr_4 * dr_2;
+
+        y = (r2.LJ_type - r1.LJ_type);
+        x = y >> 31;
+        y = (y ^ x) - x;
+        x = r2.LJ_type + r1.LJ_type;
+        r2.LJ_type = (x + y) >> 1;
+        x = (x - y) >> 1;
+        atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+        dr_2 = (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;
+        ene_lin = ene_lin + dr_2;
+      }
+    }
+    atomicAdd(&lj_ene[atom_i], ene_lin);
+  }
+}
+
+void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+              const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+              int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
+              cudaStream_t stream) {
+  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
+  int max_neighbor_numbers = 800;
+  NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
+
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
+
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, d_LJ_energy_atom, 0.);
+
+  LJ_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
+    atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, d_LJ_energy_atom);
+
+  return;
+}
+void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+              const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+              int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
+              cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh
new file mode 100644
index 0000000000..4c0b6f1805
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+              const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+              int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
+              cudaStream_t stream);
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu
new file mode 100644
index 0000000000..dd3011526c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void LJ_Force_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                              const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
+                              const float cutoff_square, VECTOR *frc) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    int N = nl_i.atom_numbers;
+    int B = ceilf(static_cast<float>(N) / blockDim.y);
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
+    VECTOR dr;
+    float dr2;
+    float dr_2;
+    float dr_4;
+    float dr_8;
+    float dr_14;
+    float frc_abs = 0.;
+    VECTOR frc_lin;
+    VECTOR frc_record = {0., 0., 0.};
+
+    int x, y;
+    int atom_pair_LJ_type;
+    for (int j = threadIdx.y * B; j < (threadIdx.y + 1) * B; j = j + 1) {
+      if (j < N) {
+        atom_j = nl_i.atom_serial[j];
+        r2 = uint_crd[atom_j];
+        int_x = r2.uint_x - r1.uint_x;
+        int_y = r2.uint_y - r1.uint_y;
+        int_z = r2.uint_z - r1.uint_z;
+        dr.x = boxlength[0].x * int_x;
+        dr.y = boxlength[0].y * int_y;
+        dr.z = boxlength[0].z * int_z;
+        dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+        if (dr2 < cutoff_square) {
+          dr_2 = 1. / dr2;
+          dr_4 = dr_2 * dr_2;
+          dr_8 = dr_4 * dr_4;
+          dr_14 = dr_8 * dr_4 * dr_2;
+
+          y = (r2.LJ_type - r1.LJ_type);
+          x = y >> 31;
+          y = (y ^ x) - x;
+          x = r2.LJ_type + r1.LJ_type;
+          r2.LJ_type = (x + y) >> 1;
+          x = (x - y) >> 1;
+          atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+          frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
+          frc_lin.x = frc_abs * dr.x;
+          frc_lin.y = frc_abs * dr.y;
+          frc_lin.z = frc_abs * dr.z;
+
+          frc_record.x = frc_record.x + frc_lin.x;
+          frc_record.y = frc_record.y + frc_lin.y;
+          frc_record.z = frc_record.z + frc_lin.z;
+
+          atomicAdd(&frc[atom_j].x, -frc_lin.x);
+          atomicAdd(&frc[atom_j].y, -frc_lin.y);
+          atomicAdd(&frc[atom_j].z, -frc_lin.z);
+        }
+      }
+    }
+    atomicAdd(&frc[atom_i].x, frc_record.x);
+    atomicAdd(&frc[atom_i].y, frc_record.y);
+    atomicAdd(&frc[atom_i].z, frc_record.z);
+  }
+}
+
+void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+             const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+             int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f,
+             cudaStream_t stream) {
+  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
+  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
+  int max_neighbor_numbers = 800;
+  NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
+
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
+
+  LJ_Force_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
+    atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, frc);
+  return;
+}
+void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+             const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+             int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh
new file mode 100644
index 0000000000..401b5d3362
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
+             const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
+             int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu
new file mode 100644
index 0000000000..22315b2dc7
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu
@@ -0,0 +1,132 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void LJ_Force_With_Direct_CF_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
+                                             const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
+                                             const float *LJ_type_A, const float *LJ_type_B, const float cutoff,
+                                             VECTOR *frc, const float pme_beta, const float sqrt_pi) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    int N = nl_i.atom_numbers;
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
+    VECTOR dr;
+    float dr_2;
+    float dr_4;
+    float dr_8;
+    float dr_6;
+    float frc_abs = 0.;
+    VECTOR frc_lin;
+    VECTOR frc_record = {0., 0., 0.};
+
+    float charge_i = r1.charge;
+    float charge_j;
+    float dr_abs;
+    float dr_1;
+    float beta_dr;
+    float frc_cf_abs;
+
+    int x, y;
+    int atom_pair_LJ_type;
+    for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
+      atom_j = nl_i.atom_serial[j];
+      r2 = uint_crd[atom_j];
+      charge_j = r2.charge;
+
+      int_x = r2.uint_x - r1.uint_x;
+      int_y = r2.uint_y - r1.uint_y;
+      int_z = r2.uint_z - r1.uint_z;
+      dr.x = boxlength[0].x * int_x;
+      dr.y = boxlength[0].y * int_y;
+      dr.z = boxlength[0].z * int_z;
+      dr_abs = norm3df(dr.x, dr.y, dr.z);
+      if (dr_abs < cutoff) {
+        dr_1 = 1. / dr_abs;
+        dr_2 = dr_1 * dr_1;
+        dr_4 = dr_2 * dr_2;
+        dr_8 = dr_4 * dr_4;
+        dr_6 = dr_4 * dr_2;
+
+        y = (r2.LJ_type - r1.LJ_type);
+        x = y >> 31;
+        y = (y ^ x) - x;
+        x = r2.LJ_type + r1.LJ_type;
+        r2.LJ_type = (x + y) >> 1;
+        x = (x - y) >> 1;
+        atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+        frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
+        beta_dr = pme_beta * dr_abs;
+        frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
+        frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
+        frc_cf_abs = charge_i * charge_j * frc_cf_abs;
+
+        frc_abs = frc_abs - frc_cf_abs;
+
+        frc_lin.x = frc_abs * dr.x;
+        frc_lin.y = frc_abs * dr.y;
+        frc_lin.z = frc_abs * dr.z;
+
+        frc_record.x = frc_record.x + frc_lin.x;
+        frc_record.y = frc_record.y + frc_lin.y;
+        frc_record.z = frc_record.z + frc_lin.z;
+
+        atomicAdd(&frc[atom_j].x, -frc_lin.x);
+        atomicAdd(&frc[atom_j].y, -frc_lin.y);
+        atomicAdd(&frc[atom_j].z, -frc_lin.z);
+      }
+    }
+    atomicAdd(&frc[atom_i].x, frc_record.x);
+    atomicAdd(&frc[atom_i].y, frc_record.y);
+    atomicAdd(&frc[atom_i].z, frc_record.z);
+  }
+}
+
+void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
+                               const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
+                               int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
+                               const float *d_LJ_B, float *frc_f, cudaStream_t stream) {
+  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
+  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
+  int max_neighbor_numbers = 800;
+  NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
+
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
+
+  LJ_Force_With_Direct_CF_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
+    atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI);
+  return;
+}
+
+void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
+                               const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
+                               int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
+                               const float *d_LJ_B, float *frc_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh
new file mode 100644
index 0000000000..826d4223bb
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
+                               const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
+                               int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
+                               const float *d_LJ_B, float *frc_f, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu
new file mode 100644
index 0000000000..44219d215e
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14CFAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                             const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                             const float *cf_scale_factor, float *ene) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
+    UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
+
+    int int_x;
+    int int_y;
+    int int_z;
+    VECTOR dr;
+    float r_1;
+    float ene_lin = 0.;
+
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    r_1 = rnorm3df(dr.x, dr.y, dr.z);
+
+    ene_lin = r1.charge * r2.charge * r_1;
+
+    ene_lin *= cf_scale_factor[dihedral_14_i];
+
+    atomicAdd(&ene[atom_i], ene_lin);
+  }
+}
+
+void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Dihedral14CFAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh
new file mode 100644
index 0000000000..7d11c10793
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
new file mode 100644
index 0000000000..1bbde8bfa8
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                         const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                         const float *cf_scale_factor, float *ene) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
+    UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
+
+    int int_x;
+    int int_y;
+    int int_z;
+    VECTOR dr;
+    float r_1;
+    float ene_lin = 0.;
+
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    r_1 = rnorm3df(dr.x, dr.y, dr.z);
+
+    ene_lin = r1.charge * r2.charge * r_1;
+
+    ene_lin *= cf_scale_factor[dihedral_14_i];
+
+    ene[dihedral_14_i] = ene_lin;
+  }
+}
+
+void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                        const float *cf_scale_factor, float *ene, cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                        const float *cf_scale_factor, float *ene, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh
new file mode 100644
index 0000000000..0e14bd7534
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength, const int *a_14, const int *b_14,
+                        const float *cf_scale_factor, float *ene, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu
new file mode 100644
index 0000000000..ccd4ba0595
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14LJAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                             const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                             const float *lj_scale_factor, const float *LJ_type_A,
+                                             const float *LJ_type_B, float *ene) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
+    UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
+
+    int int_x;
+    int int_y;
+    int int_z;
+    VECTOR dr;
+    float dr2;
+    float dr_2;
+    float dr_4;
+    float dr_6;
+    float dr_12;
+    float ene_lin = 0.;
+    int x, y;
+    int atom_pair_LJ_type;
+
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+    dr_2 = 1. / dr2;
+    dr_4 = dr_2 * dr_2;
+    dr_6 = dr_4 * dr_2;
+    dr_12 = dr_6 * dr_6;
+
+    y = (r2.LJ_type - r1.LJ_type);
+    x = y >> 31;
+    y = (y ^ x) - x;
+    x = r2.LJ_type + r1.LJ_type;
+    r2.LJ_type = (x + y) >> 1;
+    x = (x - y) >> 1;
+    atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+    ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
+              0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6;  // LJ的A,B系数已经乘以12和6因此要反乘
+    ene_lin *= lj_scale_factor[dihedral_14_i];
+
+    atomicAdd(&ene[atom_i], ene_lin);
+  }
+}
+
+void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
+                            const float *LJ_type_B, float *ene, cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Dihedral14LJAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
+                            const float *LJ_type_B, float *ene, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh
new file mode 100644
index 0000000000..d27035a67a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                            const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                            const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
+                            const float *LJ_type_B, float *ene, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
new file mode 100644
index 0000000000..dd757caab7
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
@@ -0,0 +1,140 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_numbers,
+                                                        const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
+                                                        const int *a_14, const int *b_14, const float *lj_scale_factor,
+                                                        const float *cf_scale_factor, const float *LJ_type_A,
+                                                        const float *LJ_type_B, VECTOR *frc, float *atom_energy) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1, r2;
+    VECTOR dr;
+    float dr_abs;
+    float dr2;
+    float dr_1;
+    float dr_2;
+    float dr_4;
+    float dr_8;
+    float dr_14;
+    float frc_abs = 0.;
+    VECTOR temp_frc;
+
+    float ene_lin;
+    float ene_lin2;
+
+    int x, y;
+    int atom_pair_LJ_type;
+
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    r1 = uint_crd[atom_i];
+    r2 = uint_crd[atom_j];
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+    dr_2 = 1.0 / dr2;
+    dr_4 = dr_2 * dr_2;
+    dr_8 = dr_4 * dr_4;
+    dr_14 = dr_8 * dr_4 * dr_2;
+    dr_abs = norm3df(dr.x, dr.y, dr.z);
+    dr_1 = 1. / dr_abs;
+
+    float charge_i = r1.charge;
+    float charge_j = r2.charge;
+    float frc_cf_abs;
+    frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
+    frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
+
+    y = (r2.LJ_type - r1.LJ_type);
+    x = y >> 31;
+    y = (y ^ x) - x;
+    x = r2.LJ_type + r1.LJ_type;
+    r2.LJ_type = (x + y) >> 1;
+    x = (x - y) >> 1;
+    atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+    frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
+    frc_abs *= lj_scale_factor[dihedral_14_i];
+
+    frc_abs += frc_cf_abs;
+    temp_frc.x = frc_abs * dr.x;
+    temp_frc.y = frc_abs * dr.y;
+    temp_frc.z = frc_abs * dr.z;
+
+    atomicAdd(&frc[atom_j].x, -temp_frc.x);
+    atomicAdd(&frc[atom_j].y, -temp_frc.y);
+    atomicAdd(&frc[atom_j].z, -temp_frc.z);
+    atomicAdd(&frc[atom_i].x, temp_frc.x);
+    atomicAdd(&frc[atom_i].y, temp_frc.y);
+    atomicAdd(&frc[atom_i].z, temp_frc.z);
+
+    ene_lin = r1.charge * r2.charge * dr_1;
+    ene_lin *= cf_scale_factor[dihedral_14_i];
+    ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
+               0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2;  // LJ的A,B系数已经乘以12和6因此要反乘
+    ene_lin2 *= lj_scale_factor[dihedral_14_i];
+
+    atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
+  }
+}
+
+void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                       const int *LJtype, const float *charge, const float *boxlength_f,
+                                       const int *a_14, const int *b_14, const float *lj_scale_factor,
+                                       const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                                       float *frc_f, float *atom_energy, cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, atom_energy, 0.);
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
+
+  Dihedral14LJCFForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
+    LJ_type_B, frc, atom_energy);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                   const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                                   const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
+                                   const float *LJ_type_A, const float *LJ_type_B, float *frc, float *atom_energy,
+                                   cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh
new file mode 100644
index 0000000000..4ea8262b3c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                       const int *LJtype, const float *charge, const float *boxlength_f,
+                                       const int *a_14, const int *b_14, const float *lj_scale_factor,
+                                       const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                                       float *frc, float *atom_energy, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
new file mode 100644
index 0000000000..3436c35613
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                         const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                         const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                                         float *ene) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
+    UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
+
+    int int_x;
+    int int_y;
+    int int_z;
+    VECTOR dr;
+    float dr2;
+    float dr_2;
+    float dr_4;
+    float dr_6;
+    float dr_12;
+    float ene_lin = 0.;
+    int x, y;
+    int atom_pair_LJ_type;
+
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+    dr_2 = 1. / dr2;
+    dr_4 = dr_2 * dr_2;
+    dr_6 = dr_4 * dr_2;
+    dr_12 = dr_6 * dr_6;
+
+    y = (r2.LJ_type - r1.LJ_type);
+    x = y >> 31;
+    y = (y ^ x) - x;
+    x = r2.LJ_type + r1.LJ_type;
+    r2.LJ_type = (x + y) >> 1;
+    x = (x - y) >> 1;
+    atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+    ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
+              0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6;  // LJ的A,B系数已经乘以12和6因此要反乘
+    ene_lin *= lj_scale_factor[dihedral_14_i];
+
+    ene[dihedral_14_i] = ene_lin;
+  }
+}
+
+void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
+                        cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(dihedral_14_numbers, ene, 0.);
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+
+  Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
+
+  cudaStreamSynchronize(stream);
+
+  return;
+}
+
+void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
+                        cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh
new file mode 100644
index 0000000000..4a132438b9
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
+                        cudaStream_t stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu
new file mode 100644
index 0000000000..74f7a06758
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu
@@ -0,0 +1,111 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14LJForceKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                        const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                                        VECTOR *frc) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1, r2;
+    VECTOR dr;
+    float dr2;
+    float dr_2;
+    float dr_4;
+    float dr_8;
+    float dr_14;
+    float frc_abs = 0.;
+    VECTOR temp_frc;
+    int x, y;
+    int atom_pair_LJ_type;
+
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    r1 = uint_crd[atom_i];
+    r2 = uint_crd[atom_j];
+
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+    dr_2 = 1.0 / dr2;
+    dr_4 = dr_2 * dr_2;
+    dr_8 = dr_4 * dr_4;
+    dr_14 = dr_8 * dr_4 * dr_2;
+
+    y = (r2.LJ_type - r1.LJ_type);
+    x = y >> 31;
+    y = (y ^ x) - x;
+    x = r2.LJ_type + r1.LJ_type;
+    r2.LJ_type = (x + y) >> 1;
+    x = (x - y) >> 1;
+    atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+    frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
+    frc_abs *= lj_scale_factor[dihedral_14_i];
+    temp_frc.x = frc_abs * dr.x;
+    temp_frc.y = frc_abs * dr.y;
+    temp_frc.z = frc_abs * dr.z;
+
+    atomicAdd(&frc[atom_j].x, -temp_frc.x);
+    atomicAdd(&frc[atom_j].y, -temp_frc.y);
+    atomicAdd(&frc[atom_j].z, -temp_frc.z);
+    atomicAdd(&frc[atom_i].x, temp_frc.x);
+    atomicAdd(&frc[atom_i].y, temp_frc.y);
+    atomicAdd(&frc[atom_i].z, temp_frc.z);
+  }
+}
+
+void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                       const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                       const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
+                       cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+  cudaStreamSynchronize(stream);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
+
+  Dihedral14LJForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, frc);
+  cudaStreamSynchronize(stream);
+  return;
+}
+
+void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                       const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                       const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
+                       cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh
new file mode 100644
index 0000000000..7ea476ee2c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
+                       const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
+                       const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
+                       cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu
new file mode 100644
index 0000000000..592d3a5206
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu
@@ -0,0 +1,124 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void Dihedral14LJForceWithDirectCFKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
+                                                    const VECTOR *boxlength, const int *a_14, const int *b_14,
+                                                    const float *lj_scale_factor, const float *cf_scale_factor,
+                                                    const float *LJ_type_A, const float *LJ_type_B, VECTOR *frc) {
+  int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (dihedral_14_i < dihedral_14_numbers) {
+    int int_x;
+    int int_y;
+    int int_z;
+    UINT_VECTOR_LJ_TYPE r1, r2;
+    VECTOR dr;
+    float dr_abs;
+    float dr2;
+    float dr_1;
+    float dr_2;
+    float dr_4;
+    float dr_8;
+    float dr_14;
+    float frc_abs = 0.;
+    VECTOR temp_frc;
+
+    int x, y;
+    int atom_pair_LJ_type;
+
+    int atom_i = a_14[dihedral_14_i];
+    int atom_j = b_14[dihedral_14_i];
+
+    r1 = uint_crd[atom_i];
+    r2 = uint_crd[atom_j];
+    int_x = r2.uint_x - r1.uint_x;
+    int_y = r2.uint_y - r1.uint_y;
+    int_z = r2.uint_z - r1.uint_z;
+    dr.x = boxlength[0].x * int_x;
+    dr.y = boxlength[0].y * int_y;
+    dr.z = boxlength[0].z * int_z;
+    dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+    dr_2 = 1.0 / dr2;
+    dr_4 = dr_2 * dr_2;
+    dr_8 = dr_4 * dr_4;
+    dr_14 = dr_8 * dr_4 * dr_2;
+    dr_abs = norm3df(dr.x, dr.y, dr.z);
+    dr_1 = 1. / dr_abs;
+
+    float charge_i = r1.charge;
+    float charge_j = r2.charge;
+    float frc_cf_abs;
+    frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
+    frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
+    // LJ
+    y = (r2.LJ_type - r1.LJ_type);
+    x = y >> 31;
+    y = (y ^ x) - x;
+    x = r2.LJ_type + r1.LJ_type;
+    r2.LJ_type = (x + y) >> 1;
+    x = (x - y) >> 1;
+    atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
+
+    frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
+    frc_abs *= lj_scale_factor[dihedral_14_i];
+
+    frc_abs += frc_cf_abs;
+    temp_frc.x = frc_abs * dr.x;
+    temp_frc.y = frc_abs * dr.y;
+    temp_frc.z = frc_abs * dr.z;
+
+    atomicAdd(&frc[atom_j].x, -temp_frc.x);
+    atomicAdd(&frc[atom_j].y, -temp_frc.y);
+    atomicAdd(&frc[atom_j].z, -temp_frc.z);
+    atomicAdd(&frc[atom_i].x, temp_frc.x);
+    atomicAdd(&frc[atom_i].y, temp_frc.y);
+    atomicAdd(&frc[atom_i].z, temp_frc.z);
+  }
+}
+
+void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                   const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                                   const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
+                                   const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream) {
+  size_t thread_per_block = 128;
+  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
+
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+
+  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
+  cudaStreamSynchronize(stream);
+  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
+
+  Dihedral14LJForceWithDirectCFKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
+    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
+    LJ_type_B, frc);
+
+  return;
+}
+
+void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                   const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                                   const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
+                                   const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh
new file mode 100644
index 0000000000..8e4d95b856
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                   const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
+                                   const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
+                                   const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
new file mode 100644
index 0000000000..6c514879bd
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
@@ -0,0 +1,419 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
+
+__global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+__global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+
+__global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
+                                UNSIGNED_INT_VECTOR *uint_crd) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
+    uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
+    uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
+    /*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x;
+    uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y;
+    uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/
+    uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
+    uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
+    uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
+  }
+}
+
+__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < vector_numbers) {
+    vec_list[i].x = vec_list[i].x + translation_vec.x;
+    vec_list[i].y = vec_list[i].y + translation_vec.y;
+    vec_list[i].z = vec_list[i].z + translation_vec.z;
+  }
+}
+__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < vector_numbers) {
+    vec_list[i].x = vec_list[i].x + translation_vec[0].x;
+    vec_list[i].y = vec_list[i].y + translation_vec[0].y;
+    vec_list[i].z = vec_list[i].z + translation_vec[0].z;
+  }
+}
+__global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    if (crd[atom_i].x >= 0) {
+      if (crd[atom_i].x < box_length[0]) {
+      } else {
+        crd[atom_i].x = crd[atom_i].x - box_length[0];
+      }
+    } else {
+      crd[atom_i].x = crd[atom_i].x + box_length[0];
+    }
+
+    if (crd[atom_i].y >= 0) {
+      if (crd[atom_i].y < box_length[1]) {
+      } else {
+        crd[atom_i].y = crd[atom_i].y - box_length[1];
+      }
+    } else {
+      crd[atom_i].y = crd[atom_i].y + box_length[1];
+    }
+
+    if (crd[atom_i].z >= 0) {
+      if (crd[atom_i].z < box_length[2]) {
+      } else {
+        crd[atom_i].z = crd[atom_i].z - box_length[2];
+      }
+    } else {
+      crd[atom_i].z = crd[atom_i].z + box_length[2];
+    }
+  }
+}
+
+__global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) {
+  int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
+  if (grid_serial < grid_numbers) {
+    GRID_BUCKET bucket_i = bucket[grid_serial];
+    for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) {
+      bucket_i.atom_serial[i] = -1;
+    }
+    atom_numbers_in_grid_bucket[grid_serial] = 0;
+  }
+}
+
+__global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd,
+                                         const int *grid_N, const int gridxy, int *atom_in_grid_serial) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];  // crd.x must < boxlength.x
+    int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
+    int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
+    Nx = Nx & ((Nx - grid_N[0]) >> 31);
+    Ny = Ny & ((Ny - grid_N[1]) >> 31);
+    Nz = Nz & ((Nz - grid_N[2]) >> 31);
+    atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx;
+  }
+}
+
+__global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket,
+                                        int *atom_numbers_in_grid_bucket) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int grid_serial = atom_in_grid_serial[atom_i];
+    GRID_BUCKET bucket_i = bucket[grid_serial];
+    int a = atom_numbers_in_grid_bucket[grid_serial];
+    atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
+    if (bucket_i.atom_serial[a] != atom_i) {
+      while (true) {
+        a = a + 1;
+        atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
+        if (bucket_i.atom_serial[a] == atom_i) {
+          atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
+          break;
+        }
+      }
+    } else {
+      atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
+    }
+  }
+}
+__global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
+                                    const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
+                                    const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
+                                    const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
+                                    const float cutoff_skin_square) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int grid_serial = atom_in_grid_serial[atom_i];
+    int grid_serial2;
+    int atom_numbers_in_nl_lin = 0;
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i];
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    GRID_POINTER gpointer_i = gpointer[grid_serial];
+    VECTOR dr;
+    float dr2;
+    for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) {
+      grid_serial2 = gpointer_i.grid_serial[grid_cycle];
+      GRID_BUCKET bucket_i = bucket[grid_serial2];
+      for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) {
+        atom_j = bucket_i.atom_serial[i];
+        if (atom_j > atom_i) {
+          int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x;
+          int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y;
+          int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z;
+          dr.x = uint_dr_to_dr_cof[0] * int_x;
+          dr.y = uint_dr_to_dr_cof[1] * int_y;
+          dr.z = uint_dr_to_dr_cof[2] * int_z;
+          dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+          if (dr2 < cutoff_skin_square) {
+            nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j;
+            atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1;
+          }
+        }
+      }
+    }  // 124 grid cycle
+    nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
+  }
+}
+
+__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
+                                                   const float half_skin_square, int *need_refresh_flag) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < atom_numbers) {
+    VECTOR r1 = crd[i];
+    VECTOR r2 = old_crd[i];
+    r1.x = r1.x - r2.x;
+    r1.y = r1.y - r2.y;
+    r1.z = r1.z - r2.z;
+    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
+    if (r1_2 > half_skin_square) {
+      atomicExch(&need_refresh_flag[0], 1);
+    }
+  }
+}
+
+__global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
+                                                              const int *excluded_list_start, const int *excluded_list,
+                                                              const int *excluded_atom_numbers) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int excluded_number = excluded_atom_numbers[atom_i];
+    if (excluded_number > 0) {
+      int list_start = excluded_list_start[atom_i];
+      int atom_min = excluded_list[list_start];
+      int list_end = list_start + excluded_number;
+      int atom_max = excluded_list[list_end - 1];
+      NEIGHBOR_LIST nl_i = nl[atom_i];
+      int atomnumbers_in_nl_lin = nl_i.atom_numbers;
+      int atom_j;
+      int excluded_atom_numbers_lin = list_end - list_start;
+      int excluded_atom_numbers_count = 0;
+      for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) {
+        atom_j = nl_i.atom_serial[i];
+        if (atom_j < atom_min || atom_j > atom_max) {
+          continue;
+        } else {
+          for (int j = list_start; j < list_end; j = j + 1) {
+            if (atom_j == excluded_list[j]) {
+              atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1;
+              nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin];
+              excluded_atom_numbers_count = excluded_atom_numbers_count + 1;
+              i = i - 1;
+            }
+          }
+          if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
+          } else {
+            break;
+          }  // break
+        }    // in the range of excluded min to max
+      }      // cycle for neighbors
+      nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
+    }  // if need excluded
+  }
+}
+
+void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
+                           UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
+                           int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
+                           GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
+                           int *excluded_list_start, int *excluded_list, int *excluded_numbers,
+                           float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
+                           cudaStream_t stream) {
+  if (refresh_sign[0] == 1) {
+    VECTOR trans_vec = {-skin, -skin, -skin};
+    Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
+      grid_numbers, atom_numbers_in_grid_bucket, bucket);
+
+    Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
+                                                                                                trans_vec);
+
+    Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
+                                                                                              box_length);
+
+    Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
+
+    trans_vec.x = -trans_vec.x;
+    trans_vec.y = -trans_vec.y;
+    trans_vec.z = -trans_vec.z;
+
+    Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
+                                                                                                trans_vec);
+
+    Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
+      3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
+
+    Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
+
+    Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
+
+    Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
+      d_nl, cutoff_skin_square);
+
+    Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
+                                                    stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
+                                                              excluded_numbers);
+    refresh_sign[0] = 0;
+  }
+}
+
+void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
+                                      VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
+                                      float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin,
+                                      float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
+                                      int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
+                                      int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
+                                      int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
+                                      cudaStream_t stream) {
+  VECTOR trans_vec = {skin, skin, skin};
+  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
+    grid_numbers, atom_numbers_in_grid_bucket, bucket);
+  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
+  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
+  Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
+  Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
+    3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
+  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
+  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
+                                                                                   crd, uint_crd);
+
+  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
+    cutoff_skin_square);
+  Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
+                                                  stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
+                                                            excluded_numbers);
+}
+
+__global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
+                                               int *nl_atom_serial, NEIGHBOR_LIST *nl) {
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
+    nl[i].atom_numbers = nl_atom_numbers[i];
+    nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
+  }
+}
+
+void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
+                             NEIGHBOR_LIST *nl, cudaStream_t stream) {
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
+}
+
+void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square,
+                                    int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
+                                    float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
+                                    VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
+                                    UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
+                                    NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
+                                    int *excluded_numbers, cudaStream_t stream) {
+  VECTOR trans_vec = {-skin, -skin, -skin};
+
+  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
+    grid_numbers, atom_numbers_in_grid_bucket, bucket);
+
+  Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
+
+  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
+
+  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
+  trans_vec.x = -trans_vec.x;
+  trans_vec.y = -trans_vec.y;
+  trans_vec.z = -trans_vec.z;
+  Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
+
+  cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
+
+  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
+
+  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
+                                                                                   crd, uint_crd);
+
+  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
+    cutoff_skin_square);
+
+  Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
+}
+
+__global__ void Mul_half(float *src, float *dst) {
+  int index = threadIdx.x;
+  if (index < 3) {
+    dst[index] = src[index] * 0.5;
+  }
+}
+
+void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval,
+                          int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
+                          int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
+                          int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
+                          float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
+                          float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
+                          int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
+                          int *is_need_refresh_neighbor_list, cudaStream_t stream) {
+  if (not_first_time) {
+    if (refresh_interval > 0) {
+      if (refresh_count % refresh_interval == 0) {
+        Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+        Refresh_Neighbor_List_No_Check(
+          grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
+          grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
+          reinterpret_cast<VECTOR *>(old_crd), crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
+          uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);
+      }
+      refresh_count += 1;
+    } else {
+      Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+        atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
+        is_need_refresh_neighbor_list);
+      Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+      Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
+                            reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
+                            half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
+                            gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
+                            excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
+                            stream);
+    }
+  } else {
+    Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+    Refresh_Neighbor_List_First_Time(
+      is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
+      reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
+      uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
+      excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
+      grid_N, Nxy, stream);
+  }
+}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
new file mode 100644
index 0000000000..c6c6db415d
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
+
+struct VECTOR {
+  float x;
+  float y;
+  float z;
+};
+struct INT_VECTOR {
+  int int_x;
+  int int_y;
+  int int_z;
+};
+struct UNSIGNED_INT_VECTOR {
+  unsigned int uint_x;
+  unsigned int uint_y;
+  unsigned int uint_z;
+};
+struct NEIGHBOR_LIST {
+  int atom_numbers;
+  int *atom_serial;
+};
+struct GRID_BUCKET {
+  int *atom_serial;
+};
+struct GRID_POINTER {
+  int *grid_serial;
+};
+
+void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
+                             NEIGHBOR_LIST *nl, cudaStream_t stream);
+
+void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval,
+                          int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
+                          int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
+                          int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
+                          float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
+                          float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
+                          int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
+                          int *is_need_refresh_neighbor_list, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu
new file mode 100644
index 0000000000..47e3e454f2
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu
@@ -0,0 +1,139 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
+                                                    const float exp_gamma, const float *inverse_mass,
+                                                    const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd,
+                                                    VECTOR *frc, VECTOR *acc, VECTOR *random_frc) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < atom_numbers) {
+    acc[i].x = inverse_mass[i] * frc[i].x;
+    acc[i].y = inverse_mass[i] * frc[i].y;
+    acc[i].z = inverse_mass[i] * frc[i].z;
+
+    vel[i].x = vel[i].x + dt * acc[i].x;
+    vel[i].y = vel[i].y + dt * acc[i].y;
+    vel[i].z = vel[i].z + dt * acc[i].z;
+
+    crd[i].x = crd[i].x + half_dt * vel[i].x;
+    crd[i].y = crd[i].y + half_dt * vel[i].y;
+    crd[i].z = crd[i].z + half_dt * vel[i].z;
+
+    vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
+    vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
+    vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
+
+    crd[i].x = crd[i].x + half_dt * vel[i].x;
+    crd[i].y = crd[i].y + half_dt * vel[i].y;
+    crd[i].z = crd[i].z + half_dt * vel[i].z;
+
+    frc[i].x = 0.;
+    frc[i].y = 0.;
+    frc[i].z = 0.;
+  }
+}
+
+__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt,
+                                                                      const float dt, const float exp_gamma,
+                                                                      const float *inverse_mass,
+                                                                      const float *sqrt_mass_inverse, VECTOR *vel,
+                                                                      VECTOR *crd, VECTOR *frc, VECTOR *acc,
+                                                                      VECTOR *random_frc, const float max_vel) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  float abs_vel;
+  if (i < atom_numbers) {
+    acc[i].x = inverse_mass[i] * frc[i].x;
+    acc[i].y = inverse_mass[i] * frc[i].y;
+    acc[i].z = inverse_mass[i] * frc[i].z;
+
+    vel[i].x = vel[i].x + dt * acc[i].x;
+    vel[i].y = vel[i].y + dt * acc[i].y;
+    vel[i].z = vel[i].z + dt * acc[i].z;
+
+    abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z);
+    if (abs_vel < max_vel) {
+    } else {
+      abs_vel = max_vel / abs_vel;
+      vel[i].x = abs_vel * vel[i].x;
+      vel[i].y = abs_vel * vel[i].y;
+      vel[i].z = abs_vel * vel[i].z;
+    }
+
+    crd[i].x = crd[i].x + half_dt * vel[i].x;
+    crd[i].y = crd[i].y + half_dt * vel[i].y;
+    crd[i].z = crd[i].z + half_dt * vel[i].z;
+
+    vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
+    vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
+    vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
+
+    crd[i].x = crd[i].x + half_dt * vel[i].x;
+    crd[i].y = crd[i].y + half_dt * vel[i].y;
+    crd[i].z = crd[i].z + half_dt * vel[i].z;
+
+    frc[i].x = 0.;
+    frc[i].y = 0.;
+    frc[i].z = 0.;
+  }
+}
+
+void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
+                         const float exp_gamma, const int is_max_velocity, const float max_velocity,
+                         const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
+                         float *frc_f, float *acc_f, cudaStream_t stream) {
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, vel_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, crd_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.);
+
+  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
+  VECTOR *vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(vel_f));
+  VECTOR *acc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(acc_f));
+  VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
+
+  curandStatePhilox4_32_10_t *rand_state;
+  VECTOR *random_force;
+
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&random_force), sizeof(float4) * float4_numbers);
+  Cuda_Malloc_Safely(reinterpret_cast<void **>(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers);
+  Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1);
+  Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(
+    float4_numbers, rand_state, reinterpret_cast<float4 *>(random_force));
+
+  if (!is_max_velocity) {
+    MD_Iteration_Leap_Frog_With_LiuJian<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+      atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force);
+  } else {
+    MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0,
+                                                            stream>>>(atom_numbers, half_dt, dt, exp_gamma,
+                                                                      d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc,
+                                                                      random_force, max_velocity);
+
+    cudaStreamSynchronize(stream);
+    cudaFree(random_force);
+    cudaFree(rand_state);
+
+    return;
+  }
+}
+
+void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
+                         const float exp_gamma, const int is_max_velocity, const float max_velocity,
+                         const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
+                         float *frc_f, float *acc_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh
new file mode 100644
index 0000000000..1db936bda9
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
+                         const float exp_gamma, const int is_max_velocity, const float max_velocity,
+                         const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
+                         float *frc_f, float *acc_f, cudaStream_t stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh
new file mode 100644
index 0000000000..427e63e73e
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh
@@ -0,0 +1,230 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+__constant__ float PME_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0};
+__constant__ float PME_Mb[4] = {0, 0.5, -1, 0.5};
+__constant__ float PME_Mc[4] = {0, 0.5, 0, -0.5};
+__constant__ float PME_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0};
+__constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5};
+__constant__ float PME_dMb[4] = {0, 1, -2, 1};
+__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5};
+#define PI 3.1415926
+const float periodic_factor_inverse = 2.32830643e-10;
+static dim3 thread_PME;
+
+const float cutoff = 10.0;
+const float tolerance = 0.00001;
+
+static float M_(float u, int n) {
+  if (n == 2) {
+    if (u > 2 || u < 0) return 0;
+    return 1 - abs(u - 1);
+  } else {
+    return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
+  }
+}
+
+static float Get_Beta(float cutoff, float tolerance) {
+  float beta, low, high, tempf;
+  int ilow, ihigh;
+
+  high = 1.0;
+  ihigh = 1;
+
+  while (1) {
+    tempf = erfc(high * cutoff) / cutoff;
+    if (tempf <= tolerance) break;
+    high *= 2;
+    ihigh++;
+  }
+
+  ihigh += 50;
+  low = 0.0;
+  for (ilow = 1; ilow < ihigh; ilow++) {
+    beta = (low + high) / 2;
+    tempf = erfc(beta * cutoff) / cutoff;
+    if (tempf >= tolerance)
+      low = beta;
+    else
+      high = beta;
+  }
+  return beta;
+}
+
+static cufftComplex expc(cufftComplex z) {
+  cufftComplex res;
+  float t = expf(z.x);
+  sincosf(z.y, &res.y, &res.x);
+  res.x *= t;
+  res.y *= t;
+  return res;
+}
+
+static float getb(int k, int NFFT, int B_order) {
+  cufftComplex tempc, tempc2, res;
+  float tempf;
+  tempc2.x = 0;
+  tempc2.y = 0;
+
+  tempc.x = 0;
+  tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
+  res = expc(tempc);
+
+  for (int kk = 0; kk < (B_order - 1); kk++) {
+    tempc.x = 0;
+    tempc.y = 2 * PI * k / NFFT * kk;
+    tempc = expc(tempc);
+    tempf = M_(kk + 1, B_order);
+    tempc2.x += tempf * tempc.x;
+    tempc2.y += tempf * tempc.y;
+  }
+  res = cuCdivf(res, tempc2);
+  return res.x * res.x + res.y * res.y;
+}
+
+__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin,
+                                     const float periodic_factor_inverse_x, const float periodic_factor_inverse_y,
+                                     const float periodic_factor_inverse_z, const int atom_numbers, const int fftx,
+                                     const int ffty, const int fftz, const UNSIGNED_INT_VECTOR *PME_kxyz,
+                                     UNSIGNED_INT_VECTOR *PME_uxyz, VECTOR *PME_frxyz) {
+  int atom = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom < atom_numbers) {
+    UNSIGNED_INT_VECTOR *temp_uxyz = &PME_uxyz[atom];
+    int k, tempux, tempuy, tempuz;
+    float tempf;
+    tempf = static_cast<float> (uint_crd[atom].uint_x) * periodic_factor_inverse_x;
+    tempux = static_cast<int> (tempf);
+    PME_frxyz[atom].x = tempf - tempux;
+
+    tempf = static_cast<float> (uint_crd[atom].uint_y) * periodic_factor_inverse_y;
+    tempuy = static_cast<int> (tempf);
+    PME_frxyz[atom].y = tempf - tempuy;
+
+    tempf = static_cast<float> (uint_crd[atom].uint_z) * periodic_factor_inverse_z;
+    tempuz = static_cast<int> (tempf);
+    PME_frxyz[atom].z = tempf - tempuz;
+
+    if (tempux != (*temp_uxyz).uint_x || tempuy != (*temp_uxyz).uint_y || tempuz != (*temp_uxyz).uint_z) {
+      (*temp_uxyz).uint_x = tempux;
+      (*temp_uxyz).uint_y = tempuy;
+      (*temp_uxyz).uint_z = tempuz;
+      int *temp_near = PME_atom_near + atom * 64;
+      int kx, ky, kz;
+      for (k = 0; k < 64; k++) {
+        UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k];
+        kx = tempux - temp_kxyz.uint_x;
+        if (kx < 0) kx += fftx;
+        ky = tempuy - temp_kxyz.uint_y;
+        if (ky < 0) ky += ffty;
+        kz = tempuz - temp_kxyz.uint_z;
+        if (kz < 0) kz += fftz;
+        temp_near[k] = kx * PME_Nin + ky * fftz + kz;
+      }
+    }
+  }
+}
+
+__global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxyz, float *PME_Q,
+                                    const UNSIGNED_INT_VECTOR *PME_kxyz, const int atom_numbers) {
+  int atom = blockDim.x * blockIdx.x + threadIdx.x;
+
+  if (atom < atom_numbers) {
+    int k;
+    float tempf, tempQ, tempf2;
+
+    int *temp_near = PME_atom_near + atom * 64;
+    VECTOR temp_frxyz = PME_frxyz[atom];
+    float tempcharge = charge[atom];
+
+    UNSIGNED_INT_VECTOR temp_kxyz;
+    unsigned int kx;
+
+    for (k = threadIdx.y; k < 64; k = k + blockDim.y) {
+      temp_kxyz = PME_kxyz[k];
+      kx = temp_kxyz.uint_x;
+      tempf = (temp_frxyz.x);
+      tempf2 = tempf * tempf;
+      tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+
+      tempQ = tempcharge * tempf;
+
+      kx = temp_kxyz.uint_y;
+      tempf = (temp_frxyz.y);
+      tempf2 = tempf * tempf;
+      tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+
+      tempQ = tempQ * tempf;
+
+      kx = temp_kxyz.uint_z;
+      tempf = (temp_frxyz.z);
+      tempf2 = tempf * tempf;
+      tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+      tempQ = tempQ * tempf;
+
+      atomicAdd(&PME_Q[temp_near[k]], tempQ);
+    }
+  }
+}
+
+__global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
+                                         const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
+                                         const float *charge, const float beta, const float cutoff_square,
+                                         float *direct_ene) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    int N = nl_i.atom_numbers;
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
+    VECTOR dr;
+    float dr2;
+    float dr_abs;
+    // float dr_inverse;
+    float ene_temp;
+    float charge_i = charge[atom_i];
+    float ene_lin = 0.;
+
+    // int x, y;
+    // int atom_pair_LJ_type;
+    for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
+      atom_j = nl_i.atom_serial[j];
+      r2 = uint_crd[atom_j];
+
+      int_x = r2.uint_x - r1.uint_x;
+      int_y = r2.uint_y - r1.uint_y;
+      int_z = r2.uint_z - r1.uint_z;
+      dr.x = boxlength[0].x * int_x;
+      dr.y = boxlength[0].y * int_y;
+      dr.z = boxlength[0].z * int_z;
+
+      dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+      if (dr2 < cutoff_square) {
+        dr_abs = norm3df(dr.x, dr.y, dr.z);
+        ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
+        ene_lin = ene_lin + ene_temp;
+      }
+    }
+    atomicAdd(direct_ene, ene_lin);
+  }
+}
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu
new file mode 100644
index 0000000000..7ef1132eff
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu
@@ -0,0 +1,234 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
+
+__global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) {
+  if (threadIdx.x == 0) {
+    sum[0] = 0.;
+  }
+  __syncthreads();
+  float lin = 0.0;
+  for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
+    lin = lin + list1[i] * list2[i];
+  }
+  atomicAdd(sum, lin);
+}
+
+__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) {
+  if (threadIdx.x == 0) {
+    sum[0] = 0.;
+  }
+  __syncthreads();
+  float lin = 0.0;
+  cufftComplex FQ_i;
+  for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
+    FQ_i = FQ[i];
+    lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
+  }
+  atomicAdd(sum, lin);
+}
+
+__global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
+                                               const VECTOR *sacler, const float *charge, const float pme_beta,
+                                               const float sqrt_pi, const int *excluded_list_start,
+                                               const int *excluded_list, const int *excluded_atom_numbers, float *ene) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int excluded_number = excluded_atom_numbers[atom_i];
+    if (excluded_number > 0) {
+      int list_start = excluded_list_start[atom_i];
+      // int atom_min = excluded_list[list_start];
+      int list_end = list_start + excluded_number;
+      int atom_j;
+      int int_x;
+      int int_y;
+      int int_z;
+
+      float charge_i = charge[atom_i];
+      float charge_j;
+      float dr_abs;
+      float beta_dr;
+
+      UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
+      VECTOR dr;
+      float dr2;
+
+      float ene_lin = 0.;
+
+      for (int i = list_start; i < list_end; i = i + 1) {
+        atom_j = excluded_list[i];
+        r2 = uint_crd[atom_j];
+        charge_j = charge[atom_j];
+
+        int_x = r2.uint_x - r1.uint_x;
+        int_y = r2.uint_y - r1.uint_y;
+        int_z = r2.uint_z - r1.uint_z;
+        dr.x = sacler[0].x * int_x;
+        dr.y = sacler[0].y * int_y;
+        dr.z = sacler[0].z * int_z;
+        dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+        dr_abs = sqrtf(dr2);
+        beta_dr = pme_beta * dr_abs;
+
+        ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
+      }
+      atomicAdd(ene, ene_lin);
+    }
+  }
+}
+
+void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC,
+               int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+               const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
+               const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
+               const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
+               float *d_correction_ene, cudaStream_t stream) {
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
+  int max_neighbor_numbers = 800;
+  NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
+  std::vector<float> h_box_length(3);
+  cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost,
+                  stream);
+  cudaStreamSynchronize(stream);
+  VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data());
+
+  UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
+  UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
+  VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
+  cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
+  cufftHandle PME_plan_r2c;
+  cufftHandle PME_plan_c2r;
+  cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
+  cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
+  cufftSetStream(PME_plan_r2c, stream);
+  cufftSetStream(PME_plan_c2r, stream);
+  thread_PME.x = 8;
+  thread_PME.y = 8;
+  int PME_Nin = ffty * fftz;
+  int PME_Nfft = fftx * ffty * (fftz / 2 + 1);
+  int PME_Nall = fftx * ffty * fftz;
+  float volume = box_length[0].x * box_length[0].y * box_length[0].z;
+
+  UNSIGNED_INT_VECTOR *PME_kxyz_cpu;
+  Malloc_Safely(reinterpret_cast<void **>(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64);
+
+  int kx, ky, kz, kxrp, kyrp, kzrp, index;
+  for (kx = 0; kx < 4; kx++) {
+    for (ky = 0; ky < 4; ky++) {
+      for (kz = 0; kz < 4; kz++) {
+        index = kx * 16 + ky * 4 + kz;
+        PME_kxyz_cpu[index].uint_x = kx;
+        PME_kxyz_cpu[index].uint_y = ky;
+        PME_kxyz_cpu[index].uint_z = kz;
+      }
+    }
+  }
+  cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream);
+  cudaStreamSynchronize(stream);
+  free(PME_kxyz_cpu);
+
+  // initial start
+  float *B1, *B2, *B3, *PME_BC0;
+  B1 = reinterpret_cast<float *>(malloc(sizeof(float) * fftx));
+  B2 = reinterpret_cast<float *>(malloc(sizeof(float) * ffty));
+  B3 = reinterpret_cast<float *>(malloc(sizeof(float) * fftz));
+  PME_BC0 = reinterpret_cast<float *>(malloc(sizeof(float) * PME_Nfft));
+
+  for (kx = 0; kx < fftx; kx++) {
+    B1[kx] = getb(kx, fftx, 4);
+  }
+
+  for (ky = 0; ky < ffty; ky++) {
+    B2[ky] = getb(ky, ffty, 4);
+  }
+
+  for (kz = 0; kz < fftz; kz++) {
+    B3[kz] = getb(kz, fftz, 4);
+  }
+  float mprefactor = PI * PI / -beta / beta;
+
+  float msq;
+  for (kx = 0; kx < fftx; kx++) {
+    kxrp = kx;
+    if (kx > fftx / 2) kxrp = fftx - kx;
+    for (ky = 0; ky < ffty; ky++) {
+      kyrp = ky;
+      if (ky > ffty / 2) kyrp = ffty - ky;
+      for (kz = 0; kz <= fftz / 2; kz++) {
+        kzrp = kz;
+
+        msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
+              kzrp * kzrp / box_length[0].z / box_length[0].z;
+        index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
+        if ((kx + ky + kz) == 0) {
+          PME_BC0[index] = 0;
+        } else {
+          PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
+        }
+
+        PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
+      }
+    }
+  }
+
+  cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream);
+  cudaStreamSynchronize(stream);
+  free(B1);
+  free(B2);
+  free(B3);
+  free(PME_BC0);
+
+  Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
+                                                           1 << 30);
+  PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
+    uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
+    periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
+
+  Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
+
+  PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
+                                                                           PME_kxyz, atom_numbers);
+
+  cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
+
+  PME_Energy_Reciprocal<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
+
+  PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
+  Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
+
+  Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
+  PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
+    atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
+
+  Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
+  PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
+    atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
+    d_correction_ene);
+  return;
+}
+void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC,
+               int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+               const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
+               const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
+               const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
+               float *d_correction_ene, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh
new file mode 100644
index 0000000000..90457f1c90
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
+
+#include <curand_kernel.h>
+#include <vector>
+#include "runtime/device/gpu/cuda_common.h"
+
+void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC,
+               int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+               const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
+               const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
+               const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
+               float *d_correction_ene, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu
new file mode 100644
index 0000000000..30657d5b53
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
+
+__global__ void PME_Excluded_Force_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
+                                              const VECTOR *sacler, const float *charge, const float pme_beta,
+                                              const float sqrt_pi, const int *excluded_list_start,
+                                              const int *excluded_list, const int *excluded_atom_numbers, VECTOR *frc) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int excluded_numbers = excluded_atom_numbers[atom_i];
+    if (excluded_numbers > 0) {
+      int list_start = excluded_list_start[atom_i];
+      // int atom_min = excluded_list[list_start];
+      int list_end = list_start + excluded_numbers;
+      int atom_j;
+      int int_x;
+      int int_y;
+      int int_z;
+
+      float charge_i = charge[atom_i];
+      float charge_j;
+      float dr_abs;
+      float beta_dr;
+
+      UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
+      VECTOR dr;
+      float dr2;
+
+      float frc_abs = 0.;
+      VECTOR frc_lin;
+      VECTOR frc_record = {0., 0., 0.};
+
+      for (int i = list_start; i < list_end; i = i + 1) {
+        atom_j = excluded_list[i];
+        r2 = uint_crd[atom_j];
+        charge_j = charge[atom_j];
+
+        int_x = r2.uint_x - r1.uint_x;
+        int_y = r2.uint_y - r1.uint_y;
+        int_z = r2.uint_z - r1.uint_z;
+        dr.x = sacler[0].x * int_x;
+        dr.y = sacler[0].y * int_y;
+        dr.z = sacler[0].z * int_z;
+        dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+
+        dr_abs = sqrtf(dr2);
+        beta_dr = pme_beta * dr_abs;
+        // sqrt_pi= 2/sqrt(3.141592654);
+        frc_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
+        frc_abs = (frc_abs - 1.) / dr2 / dr_abs;
+        frc_abs = -charge_i * charge_j * frc_abs;
+        frc_lin.x = frc_abs * dr.x;
+        frc_lin.y = frc_abs * dr.y;
+        frc_lin.z = frc_abs * dr.z;
+
+        frc_record.x = frc_record.x + frc_lin.x;
+        frc_record.y = frc_record.y + frc_lin.y;
+        frc_record.z = frc_record.z + frc_lin.z;
+
+        atomicAdd(&frc[atom_j].x, -frc_lin.x);
+        atomicAdd(&frc[atom_j].y, -frc_lin.y);
+        atomicAdd(&frc[atom_j].z, -frc_lin.z);
+      }  // atom_j cycle
+      atomicAdd(&frc[atom_i].x, frc_record.x);
+      atomicAdd(&frc[atom_i].y, frc_record.y);
+      atomicAdd(&frc[atom_i].z, frc_record.z);
+    }  // if need excluded
+  }
+}
+
+void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f,
+                      const float *charge, const int *excluded_list_start, const int *excluded_list,
+                      const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream) {
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
+  VECTOR *sacler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(sacler_f));
+
+  PME_Excluded_Force_Correction<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, uint_crd, sacler, charge, pme_beta, TWO_DIVIDED_BY_SQRT_PI, excluded_list_start, excluded_list,
+    excluded_atom_numbers, frc);
+  return;
+}
+
+void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f,
+                      const float *charge, const int *excluded_list_start, const int *excluded_list,
+                      const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh
new file mode 100644
index 0000000000..b14888962a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f,
+                      const float *charge, const int *excluded_list_start, const int *excluded_list,
+                      const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu
new file mode 100644
index 0000000000..b064a7df1f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu
@@ -0,0 +1,204 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
+
+__global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
+  int index = blockDim.x * blockIdx.x + threadIdx.x;
+  if (index < PME_Nfft) {
+    float tempf = PME_BC[index];
+    cufftComplex tempc = PME_FQ[index];
+    PME_FQ[index].x = tempc.x * tempf;
+    PME_FQ[index].y = tempc.y * tempf;
+  }
+}
+
+__global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force,
+                          const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz,
+                          const VECTOR PME_inverse_box_vector, const int atom_numbers) {
+  int atom = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom < atom_numbers) {
+    int k, kx;
+    float tempdQx, tempdQy, tempdQz, tempdx, tempdy, tempdz, tempx, tempy, tempz, tempdQf;
+    float tempf, tempf2;
+    float temp_charge = charge[atom];
+    int *temp_near = PME_atom_near + atom * 64;
+    UNSIGNED_INT_VECTOR temp_kxyz;
+    VECTOR temp_frxyz = PME_frxyz[atom];
+    for (k = threadIdx.y; k < 64; k = k + blockDim.y) {
+      temp_kxyz = PME_kxyz[k];
+      tempdQf = -PME_Q[temp_near[k]] * temp_charge;
+
+      kx = temp_kxyz.uint_x;
+      tempf = (temp_frxyz.x);
+      tempf2 = tempf * tempf;
+      tempx = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+      tempdx = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx];
+
+      kx = temp_kxyz.uint_y;
+      tempf = (temp_frxyz.y);
+      tempf2 = tempf * tempf;
+      tempy = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+      tempdy = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx];
+
+      kx = temp_kxyz.uint_z;
+      tempf = (temp_frxyz.z);
+      tempf2 = tempf * tempf;
+      tempz = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
+      tempdz = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx];
+
+      tempdQx = tempdx * tempy * tempz * PME_inverse_box_vector.x;
+      tempdQy = tempdy * tempx * tempz * PME_inverse_box_vector.y;
+      tempdQz = tempdz * tempx * tempy * PME_inverse_box_vector.z;
+
+      atomicAdd(&force[atom].x, tempdQf * tempdQx);
+      atomicAdd(&force[atom].y, tempdQf * tempdQy);
+      atomicAdd(&force[atom].z, tempdQf * tempdQz);
+    }
+  }
+}
+
+void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
+                        float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+                        const float *box_length_f, const int *uint_crd_f, const float *charge, float *force,
+                        cudaStream_t stream) {
+  UNSIGNED_INT_VECTOR *uint_crd =
+    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
+  UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
+  UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
+  Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
+                                                           1 << 30);
+
+  VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
+  VECTOR *frc = reinterpret_cast<VECTOR *>(force);
+
+  std::vector<float> h_box_length(3);
+  cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost,
+                  stream);
+  cudaStreamSynchronize(stream);
+  VECTOR *box_length = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(h_box_length.data()));
+  cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
+
+  VECTOR PME_inverse_box_vector;
+  PME_inverse_box_vector.x = static_cast<float>(fftx) / box_length[0].x;
+  PME_inverse_box_vector.y = static_cast<float>(ffty) / box_length[0].y;
+  PME_inverse_box_vector.z = static_cast<float>(fftz) / box_length[0].z;
+  cufftHandle PME_plan_r2c;
+  cufftHandle PME_plan_c2r;
+  cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
+  cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
+  cufftSetStream(PME_plan_r2c, stream);
+  cufftSetStream(PME_plan_c2r, stream);
+  thread_PME.x = 8;
+  thread_PME.y = 8;
+  int PME_Nin = ffty * fftz;
+  int PME_Nfft = fftx * ffty * (fftz / 2 + 1);
+  int PME_Nall = fftx * ffty * fftz;
+  float volume = box_length[0].x * box_length[0].y * box_length[0].z;
+
+  UNSIGNED_INT_VECTOR *PME_kxyz_cpu;
+  Malloc_Safely(reinterpret_cast<void **>(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64);
+
+  int kx, ky, kz, kxrp, kyrp, kzrp, index;
+  for (kx = 0; kx < 4; kx++) {
+    for (ky = 0; ky < 4; ky++) {
+      for (kz = 0; kz < 4; kz++) {
+        index = kx * 16 + ky * 4 + kz;
+        PME_kxyz_cpu[index].uint_x = kx;
+        PME_kxyz_cpu[index].uint_y = ky;
+        PME_kxyz_cpu[index].uint_z = kz;
+      }
+    }
+  }
+  cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream);
+  cudaStreamSynchronize(stream);
+  free(PME_kxyz_cpu);
+
+  // initial start
+  float *B1, *B2, *B3, *PME_BC0;
+  B1 = reinterpret_cast<float *>(malloc(sizeof(float) * fftx));
+  B2 = reinterpret_cast<float *>(malloc(sizeof(float) * ffty));
+  B3 = reinterpret_cast<float *>(malloc(sizeof(float) * fftz));
+  PME_BC0 = reinterpret_cast<float *>(malloc(sizeof(float) * PME_Nfft));
+
+  for (kx = 0; kx < fftx; kx++) {
+    B1[kx] = getb(kx, fftx, 4);
+  }
+
+  for (ky = 0; ky < ffty; ky++) {
+    B2[ky] = getb(ky, ffty, 4);
+  }
+
+  for (kz = 0; kz < fftz; kz++) {
+    B3[kz] = getb(kz, fftz, 4);
+  }
+  float mprefactor = PI * PI / -beta / beta;
+  float msq;
+  for (kx = 0; kx < fftx; kx++) {
+    kxrp = kx;
+    if (kx > fftx / 2) kxrp = fftx - kx;
+    for (ky = 0; ky < ffty; ky++) {
+      kyrp = ky;
+      if (ky > ffty / 2) kyrp = ffty - ky;
+      for (kz = 0; kz <= fftz / 2; kz++) {
+        kzrp = kz;
+
+        msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
+              kzrp * kzrp / box_length[0].z / box_length[0].z;
+        index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
+        if ((kx + ky + kz) == 0) {
+          PME_BC0[index] = 0;
+        } else {
+          PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
+        }
+
+        PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
+      }
+    }
+  }
+
+  cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream);
+  cudaStreamSynchronize(stream);
+  free(B1);
+  free(B2);
+  free(B3);
+  free(PME_BC0);
+
+  // initial end
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(
+    3 * atom_numbers, reinterpret_cast<float *>(frc), 0.);
+  PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
+    uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
+    periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
+  Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
+
+  PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
+                                                                           PME_kxyz, atom_numbers);
+
+  cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
+  PME_BCFQ<<<PME_Nfft / 1024 + 1, 1024, 0, stream>>>(PME_FQ, PME_BC, PME_Nfft);
+
+  cufftExecC2R(PME_plan_c2r, reinterpret_cast<cufftComplex *>(PME_FQ), reinterpret_cast<float *>(PME_Q));
+
+  PME_Final<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_Q, frc, PME_frxyz,
+                                                                        PME_kxyz, PME_inverse_box_vector, atom_numbers);
+  return;
+}
+
+void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
+                        float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+                        const float *box_length_f, const int *uint_crd_f, const float *charge, float *force,
+                        cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh
new file mode 100644
index 0000000000..360ae6711f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_
+
+#include <curand_kernel.h>
+#include <vector>
+#include "runtime/device/gpu/cuda_common.h"
+
+void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
+                        float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
+                        const float *box_length_f, const int *uint_crd_f, const float *charge, float *force,
+                        cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc
new file mode 100644
index 0000000000..76275ef9ba
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(
+  GetCenterOfGeometry,
+  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  GetCenterOfGeometryGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h
new file mode 100644
index 0000000000..493c6d23c2
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class GetCenterOfGeometryGpuKernel : public GpuKernel {
+ public:
+  GetCenterOfGeometryGpuKernel() : ele_center_atoms(1) {}
+  ~GetCenterOfGeometryGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    center_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "center_numbers"));
+    center_numbers_inverse = static_cast<int>(GetAttr<float>(kernel_node, "center_numbers_inverse"));
+
+    auto shape_center_atoms = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+
+    for (size_t i = 0; i < shape_center_atoms.size(); i++) ele_center_atoms *= shape_center_atoms[i];
+    for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto center_atoms = GetDeviceAddress<const T1>(inputs, 0);
+    auto crd = GetDeviceAddress<const T>(inputs, 1);
+
+    auto center_of_geometry = GetDeviceAddress<T>(outputs, 0);
+
+    GetCenterOfGeometry(center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry,
+                        reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_center_atoms * sizeof(T1));
+    input_size_list_.push_back(ele_crd * sizeof(T));
+
+    output_size_list_.push_back(3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_center_atoms = 1;
+  size_t ele_crd = 1;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int center_numbers;
+  float center_numbers_inverse;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc
new file mode 100644
index 0000000000..ada4cbe675
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(MDTemperature,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      MDTemperatureGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h
new file mode 100644
index 0000000000..7fae9a2245
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class MDTemperatureGpuKernel : public GpuKernel {
+ public:
+  MDTemperatureGpuKernel() : ele_start(1) {}
+  ~MDTemperatureGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers"));
+
+    auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_atom_vel = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_atom_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+
+    for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i];
+    for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i];
+    for (size_t i = 0; i < shape_atom_vel.size(); i++) ele_atom_vel *= shape_atom_vel[i];
+    for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto start = GetDeviceAddress<const T1>(inputs, 0);
+    auto end = GetDeviceAddress<const T1>(inputs, 1);
+    auto atom_vel_f = GetDeviceAddress<const T>(inputs, 2);
+    auto atom_mass = GetDeviceAddress<const T>(inputs, 3);
+
+    auto ek = GetDeviceAddress<T>(outputs, 0);
+
+    MDTemperature(residue_numbers, start, end, atom_vel_f, atom_mass, ek, reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_start * sizeof(T1));
+    input_size_list_.push_back(ele_end * sizeof(T1));
+    input_size_list_.push_back(ele_atom_vel * sizeof(T));
+    input_size_list_.push_back(ele_atom_mass * sizeof(T));
+
+    output_size_list_.push_back(residue_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_start = 1;
+  size_t ele_end = 1;
+  size_t ele_atom_vel = 1;
+  size_t ele_atom_mass = 1;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int residue_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc
new file mode 100644
index 0000000000..84455f13d3
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(LJEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      LJEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h
new file mode 100644
index 0000000000..8ef49930b0
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class LJEnergyGpuKernel : public GpuKernel {
+ public:
+  LJEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~LJEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    cutoff_square = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff_square"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
+    // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i];
+    for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
+    for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<T1>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+    auto scaler = GetDeviceAddress<T>(inputs, 3);
+    auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
+    auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
+    auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
+    auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
+
+    auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
+    auto nl = GetDeviceAddress<T1>(workspace, 1);
+
+    auto d_LJ_energy_atom = GetDeviceAddress<T>(outputs, 0);
+    LJEnergy(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl,
+             d_LJ_a, d_LJ_b, d_LJ_energy_atom, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_scaler * sizeof(T));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(max_nl_numbers * sizeof(T1));
+    input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
+    input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
+
+    workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
+    workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
+
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_scaler = 1;
+  size_t ele_nl = 1;
+  size_t ele_d_LJ_a = 1;
+  size_t ele_d_LJ_b = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  float cutoff_square;
+  int max_nl_numbers = 800;
+  struct UINT_VECTOR_LJ_TYPE {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+    int LJ_type;
+    float charge;
+  };
+  struct NEIGHBOR_LIST {
+    int atom_numbers;
+    int *atom_serial;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc
new file mode 100644
index 0000000000..9c7c83c639
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(LJForce,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      LJForceGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h
new file mode 100644
index 0000000000..da0a8f06db
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class LJForceGpuKernel : public GpuKernel {
+ public:
+  LJForceGpuKernel() : ele_uint_crd(1) {}
+  ~LJForceGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    cutoff_square = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff_square"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
+    // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i];
+    for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
+    for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<T1>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+    auto scaler = GetDeviceAddress<T>(inputs, 3);
+    auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
+    auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
+    auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
+    auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
+
+    auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
+    auto nl = GetDeviceAddress<T1>(workspace, 1);
+
+    auto frc = GetDeviceAddress<T>(outputs, 0);
+    LJForce(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl,
+            d_LJ_a, d_LJ_b, frc, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_scaler * sizeof(T));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(max_nl_numbers * sizeof(T1));
+    input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
+    input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
+
+    workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
+    workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
+
+    output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_scaler = 1;
+  size_t ele_d_LJ_a = 1;
+  size_t ele_d_LJ_b = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  float cutoff_square;
+  int max_nl_numbers = 800;
+  struct UINT_VECTOR_LJ_TYPE {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+    int LJ_type;
+    float charge;
+  };
+  struct NEIGHBOR_LIST {
+    int atom_numbers;
+    int *atom_serial;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc
new file mode 100644
index 0000000000..e4930691b2
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForce,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      LJForceWithPMEDirectForceGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h
new file mode 100644
index 0000000000..f7850a05d8
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h
@@ -0,0 +1,133 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class LJForceWithPMEDirectForceGpuKernel : public GpuKernel {
+ public:
+  LJForceWithPMEDirectForceGpuKernel() : ele_uint_crd(1) {}
+  ~LJForceWithPMEDirectForceGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
+    pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
+    // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i];
+    for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
+    for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<T1>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+    auto scaler = GetDeviceAddress<T>(inputs, 3);
+    auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
+    auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
+    auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
+    auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
+
+    auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
+    auto nl = GetDeviceAddress<T1>(workspace, 1);
+
+    auto frc = GetDeviceAddress<T>(outputs, 0);
+    LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ,
+                              nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
+                              reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_scaler * sizeof(T));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(max_nl_numbers * sizeof(T1));
+    input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
+    input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
+
+    workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
+    workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
+
+    output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_scaler = 1;
+  size_t ele_nl = 1;
+  size_t ele_d_LJ_a = 1;
+  size_t ele_d_LJ_b = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  float pme_beta;
+  float cutoff;
+  int max_nl_numbers = 800;
+  struct UINT_VECTOR_LJ_TYPE {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+    int LJ_type;
+    float charge;
+  };
+  struct NEIGHBOR_LIST {
+    int atom_numbers;
+    int *atom_serial;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc
new file mode 100644
index 0000000000..131babc923
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14CFAtomEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14CFAtomEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h
new file mode 100644
index 0000000000..ef0341331a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14CFAtomEnergyGpuKernel : public GpuKernel {
+ public:
+  Dihedral14CFAtomEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14CFAtomEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto cf_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto ene = GetDeviceAddress<T>(outputs, 0);
+
+    Dihedral14CFAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
+                           cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_cf_scale_factor = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc
new file mode 100644
index 0000000000..5685e1dab7
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14CFEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14CFEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
new file mode 100644
index 0000000000..3e38cf13ab
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14CFEnergyGpuKernel : public GpuKernel {
+ public:
+  Dihedral14CFEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14CFEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto cf_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto ene = GetDeviceAddress<T>(outputs, 0);
+
+    Dihedral14CFEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
+                       cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_cf_scale_factor = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc
new file mode 100644
index 0000000000..44631e4ae4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14LJAtomEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14LJAtomEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h
new file mode 100644
index 0000000000..8cd37d512f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14LJAtomEnergyGpuKernel : public GpuKernel {
+ public:
+  Dihedral14LJAtomEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14LJAtomEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+    auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
+    for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
+    for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto LJ_type_A = GetDeviceAddress<T>(inputs, 7);
+    auto LJ_type_B = GetDeviceAddress<T>(inputs, 8);
+    auto ene = GetDeviceAddress<T>(outputs, 0);
+
+    Dihedral14LJAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
+                           lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_lj_scale_factor = 1;
+  size_t ele_LJ_type_A = 1;
+  size_t ele_LJ_type_B = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc
new file mode 100644
index 0000000000..aa47797a95
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14LJCFForceWithAtomEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14LJCFForceWithAtomEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h
new file mode 100644
index 0000000000..cf3e3b8313
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h
@@ -0,0 +1,132 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14LJCFForceWithAtomEnergyGpuKernel : public GpuKernel {
+ public:
+  Dihedral14LJCFForceWithAtomEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14LJCFForceWithAtomEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+    auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
+    auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
+    for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i];
+    for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
+    for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7);
+    auto LJ_type_A = GetDeviceAddress<T>(inputs, 8);
+    auto LJ_type_B = GetDeviceAddress<T>(inputs, 9);
+    auto frc_f = GetDeviceAddress<T>(outputs, 0);
+    auto atom_energy = GetDeviceAddress<T>(outputs, 1);
+
+    Dihedral14LJCFForceWithAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14,
+                                      b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, atom_energy,
+                                      reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
+
+    output_size_list_.push_back(3 * atom_numbers * sizeof(T));
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_lj_scale_factor = 1;
+  size_t ele_cf_scale_factor = 1;
+  size_t ele_LJ_type_A = 1;
+  size_t ele_LJ_type_B = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc
new file mode 100644
index 0000000000..443784541b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14LJEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14LJEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
new file mode 100644
index 0000000000..95f1ca5b98
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
@@ -0,0 +1,124 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14LJEnergyGpuKernel : public GpuKernel {
+ public:
+  Dihedral14LJEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14LJEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+    auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
+    for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
+    for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto LJ_type_A = GetDeviceAddress<T>(inputs, 7);
+    auto LJ_type_B = GetDeviceAddress<T>(inputs, 8);
+    auto ene = GetDeviceAddress<T>(outputs, 0);
+
+    Dihedral14LJEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
+                       lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_lj_scale_factor = 1;
+  size_t ele_LJ_type_A = 1;
+  size_t ele_LJ_type_B = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc
new file mode 100644
index 0000000000..e1b6f059a6
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14LJForce,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14LJForceGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h
new file mode 100644
index 0000000000..a1fdb3d3a4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14LJForceGpuKernel : public GpuKernel {
+ public:
+  Dihedral14LJForceGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14LJForceGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+    auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
+    for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
+    for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto LJ_type_A = GetDeviceAddress<T>(inputs, 7);
+    auto LJ_type_B = GetDeviceAddress<T>(inputs, 8);
+    auto frc_f = GetDeviceAddress<T>(outputs, 0);
+    Dihedral14LJForce(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
+                      lj_scale_factor, LJ_type_A, LJ_type_B, frc_f, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_lj_scale_factor = 1;
+  size_t ele_LJ_type_A = 1;
+  size_t ele_LJ_type_B = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc
new file mode 100644
index 0000000000..36ef602f48
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(Dihedral14LJForceWithDirectCF,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      Dihedral14LJForceWithDirectCFGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h
new file mode 100644
index 0000000000..d0911d0889
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T, typename T1>
+class Dihedral14LJForceWithDirectCFGpuKernel : public GpuKernel {
+ public:
+  Dihedral14LJForceWithDirectCFGpuKernel() : ele_uint_crd(1) {}
+  ~Dihedral14LJForceWithDirectCFGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+
+    auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
+    auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
+    auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+    auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
+    auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
+    auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);
+
+    for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
+    for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
+    for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
+    for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
+    for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
+    for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
+    for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
+    for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i];
+    for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
+    for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<const T>(inputs, 2);
+    auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
+    auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
+    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
+    auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
+    auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7);
+    auto LJ_type_A = GetDeviceAddress<T>(inputs, 8);
+    auto LJ_type_B = GetDeviceAddress<T>(inputs, 9);
+    auto frc_f = GetDeviceAddress<T>(outputs, 0);
+
+    Dihedral14LJForceWithDirectCF(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14,
+                                  b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f,
+                                  reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
+    input_size_list_.push_back(ele_LJtype * sizeof(T1));
+    input_size_list_.push_back(ele_charge * sizeof(T));
+    input_size_list_.push_back(ele_boxlength_f * sizeof(T));
+    input_size_list_.push_back(ele_a_14 * sizeof(T1));
+    input_size_list_.push_back(ele_b_14 * sizeof(T1));
+    input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
+    input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
+
+    output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  size_t ele_LJtype = 1;
+  size_t ele_charge = 1;
+  size_t ele_boxlength_f = 1;
+  size_t ele_a_14 = 1;
+  size_t ele_b_14 = 1;
+  size_t ele_lj_scale_factor = 1;
+  size_t ele_cf_scale_factor = 1;
+  size_t ele_LJ_type_A = 1;
+  size_t ele_LJ_type_B = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int dihedral_14_numbers;
+  int atom_numbers;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
new file mode 100644
index 0000000000..f0e097c7a4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(NeighborListUpdate,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      NeighborListUpdateGpuKernel, int, float)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
new file mode 100644
index 0000000000..198a04dd7e
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
@@ -0,0 +1,170 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class NeighborListUpdateGpuKernel : public GpuKernel {
+ public:
+  NeighborListUpdateGpuKernel() : skin(2.0), cutoff(10.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
+  ~NeighborListUpdateGpuKernel() override = default;
+  bool Init(const CNodePtr &kernel_node) override {
+    grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    refresh_count = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_count"));
+    refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
+    not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
+    Nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "Nxy"));
+    excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));
+
+    cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
+    half_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "half_skin_square"));
+    cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin"));
+    half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin"));
+    cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square"));
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto atom_numbers_in_grid_bucket = GetDeviceAddress<int>(inputs, 0);
+    auto bucket = GetDeviceAddress<int>(inputs, 1);
+    auto crd = GetDeviceAddress<float>(inputs, 2);
+    auto box_length = GetDeviceAddress<float>(inputs, 3);
+    auto grid_N = GetDeviceAddress<int>(inputs, 4);
+    auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
+    auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
+    auto old_crd = GetDeviceAddress<float>(inputs, 7);
+    auto crd_to_uint_crd_cof = GetDeviceAddress<float>(inputs, 8);
+    auto uint_crd = GetDeviceAddress<unsigned int>(inputs, 9);
+    auto gpointer = GetDeviceAddress<int>(inputs, 10);
+    auto nl_atom_numbers = GetDeviceAddress<int>(inputs, 11);
+    auto nl_atom_serial = GetDeviceAddress<int>(inputs, 12);
+    auto uint_dr_to_dr_cof = GetDeviceAddress<float>(inputs, 13);
+    auto excluded_list_start = GetDeviceAddress<int>(inputs, 14);
+    auto excluded_list = GetDeviceAddress<int>(inputs, 15);
+    auto excluded_numbers = GetDeviceAddress<int>(inputs, 16);
+    auto need_refresh_flag = GetDeviceAddress<int>(inputs, 17);
+
+    GRID_BUCKET *d_bucket = reinterpret_cast<GRID_BUCKET *>(GetDeviceAddress<int>(workspaces, 0));
+    GRID_POINTER *d_gpointer = reinterpret_cast<GRID_POINTER *>(GetDeviceAddress<int>(workspaces, 1));
+    NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
+    float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);
+
+    std::vector<GRID_BUCKET> h_bucket(grid_numbers);
+    for (size_t i = 0; i < h_bucket.size(); i += 1) {
+      h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
+    }
+    std::vector<GRID_POINTER> h_gpointer(grid_numbers);
+    for (size_t i = 0; i < h_gpointer.size(); i += 1) {
+      h_gpointer[i].grid_serial = gpointer + i * 125;
+    }
+
+    cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+    cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+    Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
+                            reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    Neighbor_List_Update(grid_numbers, atom_numbers, refresh_count, refresh_interval, not_first_time, skin, Nxy,
+                         cutoff_square, cutoff_with_skin_square, grid_N, box_length, atom_numbers_in_grid_bucket,
+                         grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
+                         half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
+                         excluded_list, excluded_numbers, half_skin_square, need_refresh_flag,
+                         reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(sizeof(int) * grid_numbers);
+    input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers);
+    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+
+    input_size_list_.push_back(sizeof(INT_VECTOR));
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(int) * grid_numbers * 125);
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+    input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+    input_size_list_.push_back(sizeof(int) * excluded_atom_numbers);
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(int));
+
+    workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers);
+    workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers);
+    workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers);
+    workspace_size_list_.push_back(sizeof(float) * 3);
+
+    output_size_list_.push_back(sizeof(float));
+  }
+
+ private:
+  float skin;
+  float cutoff;
+  int not_first_time;
+  int atom_numbers;
+  int grid_numbers;
+  int refresh_count;
+  int refresh_interval;
+  int Nxy;
+  int max_atom_in_grid_numbers;
+  int max_neighbor_numbers;
+  int excluded_atom_numbers;
+  float half_skin_square;
+  float cutoff_square;
+  float cutoff_with_skin;
+  float half_cutoff_with_skin;
+  float cutoff_with_skin_square;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc
new file mode 100644
index 0000000000..f55a5c2690
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(MDIterationLeapFrog,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      MDIterationLeapFrogGpuKernel, float)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h
new file mode 100644
index 0000000000..c7a121ce1d
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+
+template <typename T>
+class MDIterationLeapFrogGpuKernel : public GpuKernel {
+ public:
+  MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {}
+  ~MDIterationLeapFrogGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    float4_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "float4_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    half_dt = static_cast<float>(GetAttr<float>(kernel_node, "half_dt"));
+    dt = static_cast<float>(GetAttr<float>(kernel_node, "dt"));
+    exp_gamma = static_cast<float>(GetAttr<float>(kernel_node, "exp_gamma"));
+    is_max_velocity = static_cast<int>(GetAttr<int64_t>(kernel_node, "is_max_velocity"));
+    max_velocity = static_cast<float>(GetAttr<float>(kernel_node, "max_velocity"));
+
+    // printf("float4_numbers: %d", float4_numbers);
+    // printf("atom_numbers: %d", atom_numbers);
+    // printf("half_dt: %f", half_dt);
+    // printf("dt: %f", dt);
+    // printf("exp_gamma: %f", exp_gamma);
+    // printf("is_max_velocity: %d", is_max_velocity);
+    // printf("max_velocity: %f", max_velocity);
+
+    auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+
+    for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
+    for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i];
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto d_mass_inverse = GetDeviceAddress<const T>(inputs, 0);
+    auto d_sqrt_mass = GetDeviceAddress<const T>(inputs, 1);
+
+    auto vel_f = GetDeviceAddress<T>(outputs, 0);
+    auto crd_f = GetDeviceAddress<T>(outputs, 1);
+    auto frc_f = GetDeviceAddress<T>(outputs, 2);
+    auto acc_f = GetDeviceAddress<T>(outputs, 3);
+
+    MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity,
+                        d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f,
+                        reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(ele_mass_inverse * sizeof(T));
+    input_size_list_.push_back(ele_sqrt_mass * sizeof(T));
+
+    output_size_list_.push_back(3 * atom_numbers * sizeof(T));
+    output_size_list_.push_back(3 * atom_numbers * sizeof(T));
+    output_size_list_.push_back(3 * atom_numbers * sizeof(T));
+    output_size_list_.push_back(3 * atom_numbers * sizeof(T));
+  }
+
+ private:
+  size_t ele_mass_inverse = 1;
+  size_t ele_sqrt_mass = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int float4_numbers;
+  int atom_numbers;
+  float half_dt;
+  float dt;
+  float exp_gamma;
+  int is_max_velocity;
+  float max_velocity;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc
new file mode 100644
index 0000000000..94e01ff08d
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(PMEEnergy,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      PMEEnergyGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h
new file mode 100644
index 0000000000..37834b078c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h
@@ -0,0 +1,147 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <cufft.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class PMEEnergyGpuKernel : public GpuKernel {
+ public:
+  PMEEnergyGpuKernel() : ele_uint_crd(1) {}
+  ~PMEEnergyGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
+    fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
+    ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
+    fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
+    PME_Nall = fftx * ffty * fftz;
+    PME_Nfft = fftx * ffty * (fftz / 2 + 1);
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto boxlength = GetDeviceAddress<T>(inputs, 0);
+    auto uint_crd = GetDeviceAddress<T1>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+    auto nl_numbers = GetDeviceAddress<T1>(inputs, 3);
+    auto nl_serial = GetDeviceAddress<T1>(inputs, 4);
+    auto scaler = GetDeviceAddress<T>(inputs, 5);
+    auto excluded_list_start = GetDeviceAddress<int>(inputs, 6);
+    auto excluded_list = GetDeviceAddress<int>(inputs, 7);
+    auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 8);
+
+    auto pme_uxyz = GetDeviceAddress<int>(workspace, 0);       // workspace
+    auto pme_frxyz = GetDeviceAddress<float>(workspace, 1);    // workspace
+    auto pme_q = GetDeviceAddress<T>(workspace, 2);            // workspace
+    auto pme_fq = GetDeviceAddress<float>(workspace, 3);       // workspace
+    auto pme_atom_near = GetDeviceAddress<int>(workspace, 4);  // workspace
+    auto pme_bc = GetDeviceAddress<float>(workspace, 5);       // workspace
+    auto pme_kxyz = GetDeviceAddress<int>(workspace, 6);       // workspace
+    auto nl = GetDeviceAddress<T1>(workspace, 7);
+
+    auto reciprocal_ene = GetDeviceAddress<T>(outputs, 0);
+    auto self_ene = GetDeviceAddress<T>(outputs, 1);
+    auto direct_ene = GetDeviceAddress<T>(outputs, 2);
+    auto correction_ene = GetDeviceAddress<T>(outputs, 3);
+
+    PMEEnergy(fftx, ffty, fftz, atom_numbers, beta, boxlength, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq,
+              pme_atom_near, pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start,
+              excluded_list, excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene,
+              reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(max_nl_numbers * sizeof(T1));
+    input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+
+    workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
+    workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+    workspace_size_list_.push_back(PME_Nall * sizeof(T));
+    workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
+    workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
+    workspace_size_list_.push_back(PME_Nfft * sizeof(float));
+    workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
+    workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
+
+    output_size_list_.push_back(sizeof(T));
+    output_size_list_.push_back(sizeof(T));
+    output_size_list_.push_back(sizeof(T));
+    output_size_list_.push_back(sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  int max_nl_numbers = 800;
+  int fftx;
+  int ffty;
+  int fftz;
+  float beta;
+  int PME_Nall;
+  int PME_Nfft;
+  struct VECTOR {
+    float x;
+    float y;
+    float z;
+  };
+
+  struct UNSIGNED_INT_VECTOR {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+  };
+
+  struct NEIGHBOR_LIST {
+    int atom_numbers;
+    int *atom_serial;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc
new file mode 100644
index 0000000000..822ccb881c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(PMEExcludedForce,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      PMEExcludedForceGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h
new file mode 100644
index 0000000000..4eca3b7ffd
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <cufft.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class PMEExcludedForceGpuKernel : public GpuKernel {
+ public:
+  PMEExcludedForceGpuKernel() : ele_uint_crd(1) {}
+  ~PMEExcludedForceGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto uint_crd = GetDeviceAddress<int>(inputs, 0);
+    auto sacler = GetDeviceAddress<T>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+    auto excluded_list_start = GetDeviceAddress<int>(inputs, 3);
+    auto excluded_list = GetDeviceAddress<int>(inputs, 4);
+    auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 5);
+
+    auto force = GetDeviceAddress<T>(outputs, 0);
+    PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list,
+                     excluded_atom_numbers, force, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(T));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+    input_size_list_.push_back(atom_numbers * sizeof(T1));
+
+    output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  float beta;
+  struct VECTOR {
+    float x;
+    float y;
+    float z;
+  };
+
+  struct UNSIGNED_INT_VECTOR {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc
new file mode 100644
index 0000000000..94c2e7130f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(PMEReciprocalForce,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      PMEReciprocalForceGpuKernel, float, int)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h
new file mode 100644
index 0000000000..161fb6ccc5
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h
@@ -0,0 +1,119 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
+#include <cuda_runtime_api.h>
+#include <cufft.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class PMEReciprocalForceGpuKernel : public GpuKernel {
+ public:
+  PMEReciprocalForceGpuKernel() : ele_uint_crd(1) {}
+  ~PMEReciprocalForceGpuKernel() override = default;
+
+  bool Init(const CNodePtr &kernel_node) override {
+    kernel_node_ = kernel_node;
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
+    fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
+    ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
+    fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
+    PME_Nall = fftx * ffty * fftz;
+    PME_Nfft = fftx * ffty * (fftz / 2 + 1);
+
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto boxlength = GetDeviceAddress<T>(inputs, 0);
+    auto uint_crd = GetDeviceAddress<const T1>(inputs, 1);
+    auto charge = GetDeviceAddress<T>(inputs, 2);
+
+    auto pme_uxyz = GetDeviceAddress<int>(workspace, 0);       // workspace
+    auto pme_frxyz = GetDeviceAddress<float>(workspace, 1);    // workspace
+    auto pme_q = GetDeviceAddress<T>(workspace, 2);            // workspace
+    auto pme_fq = GetDeviceAddress<float>(workspace, 3);       // workspace
+    auto pme_atom_near = GetDeviceAddress<int>(workspace, 4);  // workspace
+    auto pme_bc = GetDeviceAddress<float>(workspace, 5);       // workspace
+    auto pme_kxyz = GetDeviceAddress<int>(workspace, 6);       // workspace
+
+    auto force = GetDeviceAddress<T>(outputs, 0);
+
+    PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near,
+                       pme_kxyz, boxlength, uint_crd, charge, force, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
+    input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+
+    workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
+    workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+    workspace_size_list_.push_back(PME_Nall * sizeof(T));
+    workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
+    workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
+    workspace_size_list_.push_back(PME_Nfft * sizeof(float));
+    workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
+
+    output_size_list_.push_back(atom_numbers * sizeof(VECTOR));
+  }
+
+ private:
+  size_t ele_uint_crd = 1;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  int atom_numbers;
+  int fftx;
+  int ffty;
+  int fftz;
+  float beta;
+  int PME_Nall;
+  int PME_Nfft;
+
+  struct VECTOR {
+    float x;
+    float y;
+    float z;
+  };
+
+  struct UNSIGNED_INT_VECTOR {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+  };
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/cxx_api/CMakeLists.txt b/mindspore/ccsrc/cxx_api/CMakeLists.txt
index 5041dea655..59c967ba16 100644
--- a/mindspore/ccsrc/cxx_api/CMakeLists.txt
+++ b/mindspore/ccsrc/cxx_api/CMakeLists.txt
@@ -114,7 +114,8 @@ if(ENABLE_GPU)
                           ${CUDNN_LIBRARY_PATH}
                           ${CUDA_PATH}/lib64/libcudart.so
                           ${CUDA_PATH}/lib64/stubs/libcuda.so
-                          ${CUDA_PATH}/lib64/libcusolver.so)
+                          ${CUDA_PATH}/lib64/libcusolver.so
+                          ${CUDA_PATH}/lib64/libcufft.so)
 endif()
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 762adcfd03..9282449e4e 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -100,8 +100,13 @@ from ._embedding_cache_ops import (CacheSwapHashmap, SearchCacheIdx, CacheSwapTa
                                    MapUniform, DynamicAssign, PadAndShift)
 from .quantum_ops import PQC, Evolution
 from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAtomEnergy, BondForceWithAtomVirial,
-                         DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy,
-                         AngleForce, AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy)
+                         DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy, AngleForce,
+                         AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce,
+                         LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce,
+                         Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy,
+                         Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog,
+                         GetCenterOfGeometry, MDTemperature, NeighborListUpdate)
+
 
 __all__ = [
     'Unique',
@@ -438,6 +443,24 @@ __all__ = [
     "AngleEnergy",
     "AngleAtomEnergy",
     "AngleForceWithAtomEnergy",
+    'PMEReciprocalForce',
+    'LJForce',
+    'LJForceWithPMEDirectForce',
+    'LJEnergy',
+    'PMEExcludedForce',
+    'PMEEnergy',
+    "Dihedral14LJForce",
+    "Dihedral14LJEnergy",
+    "Dihedral14LJForceWithDirectCF",
+    "Dihedral14LJCFForceWithAtomEnergy",
+    "Dihedral14LJAtomEnergy",
+    "Dihedral14CFEnergy",
+    "MDIterationLeapFrog",
+    "Dihedral14CFAtomEnergy",
+    "GetCenterOfGeometry",
+    "MDTemperature",
+    "NeighborListUpdate",
+
 ]
 
 __all__.sort()
diff --git a/mindspore/ops/operations/sponge_ops.py b/mindspore/ops/operations/sponge_ops.py
index 3737fce043..d25e74553b 100644
--- a/mindspore/ops/operations/sponge_ops.py
+++ b/mindspore/ops/operations/sponge_ops.py
@@ -1,902 +1,1988 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Operators for sponge."""
-
-from ..primitive import PrimitiveWithInfer, prim_attr_register
-from ..._checkparam import Validator as validator
-from ...common import dtype as mstype
-from ..._checkparam import Rel
-
-
-class BondForce(PrimitiveWithInfer):
-    """
-    BondForce:
-
-    Calculate the force exerted by the simple harmonic bond on the
-    corresponding atoms. Assume the number of harmonic bonds is M and
-    the number of atoms is N.
-
-    .. math::
-
-        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
-        F = (F_x, F_y, F_z) = 2*k*(1 - r_0/|dr|)*dr
-
-    Inputs:
-        - **uint_crd_f** (Tensor, uint32 ) - [N, 3], the unsigned int coordinate
-        value of each atom.
-        - **scaler_f** (Tensor, float32) - [3, 1], the 3-D scale factor (x, y, z),
-    between the real space float coordinates and the unsigned int coordinates.
-        - **atom_a** (Tensor, int32) - [M, 1], the first atom index of each bond.
-        - **atom_b** (Tensor, int32) - [M, 1], the second atom index of each bond.
-        - **bond_k** (Tensor, float32) - [M, 1], the force constant of each bond.
-        - **bond_r0** (Tensor, float32) - [M, 1], the equlibrium length of each bond.
-
-    Outputs:
-        - **frc_f** (float32 Tensor) - [N, 3], the force felt by each atom.
-
-    Supported Platforms:
-        ``GPU``
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, bond_numbers):
-        self.bond_numbers = bond_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
-                                outputs=['frc_f'])
-        self.add_prim_attr('bond_numbers', self.bond_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
-        cls_name = self.name
-        # N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
-        validator.check_int(
-            bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
-        return uint_crd_f_shape
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
-        return bond_r0_type
-
-
-class BondEnergy(PrimitiveWithInfer):
-    """
-    BondEnergyCuda:
-
-    Calculate the harmonic potential energy between each bonded atom pair.
-    Assume our system has N atoms and M harmonic bonds.
-
-    .. math::
-
-        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
-        E = k*(|dr| - r_0)^2
-
-    Inputs:
-        Same as operator BondForce().
-
-    .. math::
-
-        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
-        E = k*(|dr| - r_0)^2
-
-    Outputs:
-        - **bond_ene** (Tensor, float32) - [M, 1], the harmonic potential energy
-        for each bond.
-
-    Supported Platforms:
-        ``GPU``
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, bond_numbers):
-        self.bond_numbers = bond_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
-                                outputs=['bond_ene'])
-        self.add_prim_attr('bond_numbers', self.bond_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
-        cls_name = self.name
-        # N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
-        validator.check_int(
-            bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
-
-        return bond_k_shape
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
-        return bond_r0_type
-
-
-class BondAtomEnergy(PrimitiveWithInfer):
-    """
-    BondAtomEnergyCuda:
-
-    Add the potential energy caused by simple harmonic bonds to the total
-    potential energy of each atom.
-
-    The calculation formula is the same as operator BondEnergy().
-
-    Inputs:
-        Same as operator BondForce().
-
-    Outputs:
-        - **atom_ene** (Tensor, float32) - [N, 1], the accumulated potential
-        energy for each atom.
-
-    Supported Platforms:
-        ``GPU``
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, bond_numbers):
-        self.bond_numbers = bond_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
-                                outputs=['atom_ene'])
-        self.add_prim_attr('bond_numbers', self.bond_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
-        validator.check_int(
-            bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
-        return [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
-        return bond_r0_type
-
-
-class BondForceWithAtomEnergy(PrimitiveWithInfer):
-    """
-    BondForceWithAtomEnergy:
-
-    Calculate bond force and harmonic potential energy together.
-
-    The calculation formula is the same as operator BondForce() and BondEnergy().
-
-    Inputs:
-        Same as operator BondForce().
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce().
-        - **atom_e** (Tensor, float32) - [N, 1], same as atom_ene in operator BondAtomEnergy().
-
-    Supported Platforms:
-        ``GPU``
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, bond_numbers):
-        self.bond_numbers = bond_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
-                                outputs=['frc_f', 'atom_e'])
-        self.add_prim_attr('bond_numbers', self.bond_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
-        validator.check_int(
-            bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
-        return uint_crd_f_shape, [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
-        return bond_r0_type, bond_r0_type
-
-
-class BondForceWithAtomVirial(PrimitiveWithInfer):
-    """
-    BondForceWithAtomVirial:
-
-    Calculate bond force and the virial coefficient caused by simple harmonic
-    bond for each atom together.
-
-    The calculation formula of the force part is the same as operator BondForce().
-    The Virial part is as follows:
-
-    .. math::
-
-        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
-        virial = |dr|*(|dr| - r_0)*k
-
-    Inputs:
-        Same as operator BondForce()
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce().
-        - **atom_v** (Tensor, float32) - [N, 1], the accumulated virial coefficient
-        for each atom.
-
-    Supported Platforms:
-        ``GPU``
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, bond_numbers):
-        self.bond_numbers = bond_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
-                                outputs=['frc_f', 'atom_v'])
-        self.add_prim_attr('bond_numbers', self.bond_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
-        validator.check_int(
-            bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
-        return uint_crd_f_shape, [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
-        return bond_r0_type, bond_r0_type
-
-
-class DihedralForce(PrimitiveWithInfer):
-    """
-    DihedralForce:
-
-    Calculate the force exerted by the dihedral term which made of 4-atoms
-    on the corresponding atoms. Assume the number of dihedral terms is M and
-    the number of atoms is N.
-
-    .. math::
-
-        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
-        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
-        dr_{cd} = (x_d-x_c, y_d-y_c, z_d-z_c)
-
-        r1 = dr_{ab}*dr_{cb}
-        r2 = dr_{cd}*dr_{cb}
-
-        phi = pi - sign(inner_product(r1*r2), dr_{cb})
-            * arccos(inner_product(r1, r2)/|r1|/|r2|)
-        dEdphi = n*phi*(k*cos(phi_0)*sin(n*phi) - k*sin(phi_0)*cos(n*phi))/sin(phi)
-        dphidr1 = r2/|r1|/|r2| + cos(phi)/|r1|^2*r1
-        dphidr2 = r1/|r1|/|r2| + cos(phi)/|r2|^2*r2
-
-        dEdra = dEdphi * dr_{cb} * dphidr1
-        dEdrd = dEdphi * dphi_dr2 * dr_{cb}
-        dEdrjpart = dEdphi * ((dr_{ab} * dphidr1) + (dr_{cd} * dphidr2))
-
-        F_a = dEdri
-        F_b = dEdrjpart - dEdri
-        F_c = - dEdrl - dEdrjpart
-        F_d = dEdrl
-
-    Inputs:
-        - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinates
-        value of each atom.
-        - **scalar_f** (Tensor, float32) - [3, ], the 3-D scale factor between
-        the real space float coordinates and the unsigned int coordinates.
-        - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each dihedral.
-        - **atom_b** (Tensor, int32) - [M, ], the 2nd atom index of each dihedral.
-        - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each dihedral.
-        - **atom_d** (Tensor, int32) - [M, ], the 4th atom index of each dihedral.
-        4 atoms are connected in the form a-b-c-d.
-        - **ipn** (Tensor, int32) - [M, ], the period of dihedral angle of each dihedral.
-        - **pk** (Tensor, float32) - [M, ], the force constant of each dihedral.
-        - **gamc** (Tensor, float32) - [M, ], k*cos(phi_0) of each dihedral.
-        - **gams** (Tensor, float32) - [M, ], k*sin(phi_0) of each dihedral.
-        - **pn** (Tensor, float32) - [M, ], the floating point form of ipn.
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, dihedral_numbers):
-        self.dihedral_numbers = dihedral_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
-                                        'gamc', 'gams', 'pn'],
-                                outputs=['frc_f'])
-        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
-                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
-        cls_name = self.name
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
-        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
-        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
-        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
-        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
-        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
-        return uint_crd_f_shape
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
-                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
-        return pn_type
-
-
-class DihedralEnergy(PrimitiveWithInfer):
-    """
-    DihedralEnergy:
-
-    Calculate the potential energy caused by dihedral terms for each 4-atom pair.
-    Assume our system has N atoms and M dihedral terms.
-
-    .. math::
-
-        E = k(1 + cos(n*phi - phi_0))
-
-    Inputs:
-        Same as operator DihedralForce().
-
-    Outputs:
-        - **ene** (Tensor, float32) - [M, ], the potential energy for each
-        dihedral term.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, dihedral_numbers):
-        self.dihedral_numbers = dihedral_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
-                                        'gamc', 'gams', 'pn'],
-                                outputs=['ene'])
-        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
-                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
-        cls_name = self.name
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
-        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
-        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
-        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
-        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
-        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
-        return [M,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
-                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
-        return pn_type
-
-
-class DihedralAtomEnergy(PrimitiveWithInfer):
-    """
-    DihedralAtomEnergy:
-
-    Add the potential energy caused by dihedral terms to the total potential
-    energy of each atom.
-
-    The calculation formula is the same as operator DihedralEnergy().
-
-    Inputs:
-        Same as operator DihedralEnergy().
-
-    Outputs:
-        - **ene** (Tensor, float32) - [N, ], the accumulated potential
-        energy for each atom.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, dihedral_numbers):
-        self.dihedral_numbers = dihedral_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
-                                        'gamc', 'gams', 'pn'],
-                                outputs=['ene'])
-        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
-                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
-        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
-        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
-        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
-        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
-        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
-        return [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
-                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
-        return pn_type
-
-
-class DihedralForceWithAtomEnergy(PrimitiveWithInfer):
-    """
-    DihedralForceWithAtomEnergy:
-
-    Calculate dihedral force and potential energy together.
-
-    The calculation formula is the same as operator DihedralForce() and DihedralEnergy().
-
-    Inputs:
-        Same as operator DihedralForce().
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], same as operator DihedralForce().
-        - **ene** (Tensor, float32) - [N, ], same as operator DihedralAtomEnergy().
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, dihedral_numbers):
-        self.dihedral_numbers = dihedral_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
-                                        'gamc', 'gams', 'pn'],
-                                outputs=['frc_f', 'ene'])
-        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
-                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
-        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
-        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
-        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
-        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
-        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
-        return uint_crd_f_shape, [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
-                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
-        return pn_type, pn_type
-
-
-class AngleForce(PrimitiveWithInfer):
-    """
-    AngleForce:
-
-    Calculate the force exerted by angles made of 3 atoms on the
-    corresponding atoms. Assume the number of angles is M and the
-    number of atoms is N.
-
-    .. math::
-
-        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
-        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
-        theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|)
-        F_a = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{ab}|^2*dr_{ab}
-            - 1/|dr_{ab}|/|dr_{cb}|*dr_{cb}]
-        F_c = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{cb}|^2*dr_{cb}
-             - 1/|dr_{cb}|/|dr_{ab}|*dr_{ab}]
-        F_b = -F_a - F_c
-
-    Inputs:
-        - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate
-        value of each atom.
-        - **scaler_f** (Tensor, float32) - [3, ], the 3-D scale factor between
-        the real space float coordinates and the unsigned int coordinates.
-        - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each angle.
-        - **atom_b** (Tensor, int32) - [M, ], the 2nd and the central atom index
-        of each angle.
-        - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each angle.
-        - **angle_k** (Tensor, float32) - [M, ], the force constant for each angle.
-        - **angle_theta0** (Tensor, float32) - [M, ], the equilibrium position value
-        for each angle.
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, angle_numbers):
-        self.angle_numbers = angle_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
-                                        'angle_theta0'],
-                                outputs=['frc_f'])
-        self.add_prim_attr('angle_numbers', self.angle_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
-                    angle_theta0_shape):
-        cls_name = self.name
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
-        validator.check_int(
-            angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
-        return uint_crd_f_shape
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
-                    angle_theta0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
-        return angle_k_type
-
-
-class AngleEnergy(PrimitiveWithInfer):
-    """
-    AngleEnergy:
-
-    Calculate the energy caused by 3-atoms angle term.
-
-    .. math::
-
-        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
-        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
-        theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|)
-        E = k*(theta - theta_0)^2
-
-    Inputs:
-        Same as operator AngleForce().
-
-    Outputs:
-        - **ene** (Tensor, float32) - [M, ], the potential energy for
-        each angle term.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, angle_numbers):
-        self.angle_numbers = angle_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
-                                        'angle_theta0'],
-                                outputs=['ene'])
-        self.add_prim_attr('angle_numbers', self.angle_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
-                    angle_theta0_shape):
-        cls_name = self.name
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
-        validator.check_int(
-            angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
-        return [M,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
-                    angle_theta0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
-        return angle_k_type
-
-
-class AngleAtomEnergy(PrimitiveWithInfer):
-    """
-    AngleAtomEnergy:
-
-    Add the potential energy caused by angle terms to the total potential
-    energy of each atom.
-
-    The calculation formula is the same as operator AngleEnergy().
-
-    Inputs:
-        Same as operator AngleForce().
-
-    Outputs:
-        - **ene** (Tensor, float32) - [N, ], the accumulated potential energy
-        for each atom.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, angle_numbers):
-        self.angle_numbers = angle_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
-                                        'angle_theta0'],
-                                outputs=['ene'])
-        self.add_prim_attr('angle_numbers', self.angle_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
-                    angle_theta0_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
-        validator.check_int(
-            angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
-        return [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
-                    angle_theta0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
-        return angle_k_type
-
-
-class AngleForceWithAtomEnergy(PrimitiveWithInfer):
-    """
-    AngleForceWithAtomEnergy:
-
-    Calculate angle force and potential energy together.
-
-    The calculation formula is the same as operator AngleForce() and AngleEnergy().
-
-    Inputs:
-        Same as operator AngleForce().
-
-    Outputs:
-        - **frc_f** (Tensor, float32) - [N, 3], same as operator AngleForce().
-        - **ene** (Tensor, float) - [N, ], same as operator AngleAtomEnergy().
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-    """
-
-    @prim_attr_register
-    def __init__(self, angle_numbers):
-        self.angle_numbers = angle_numbers
-        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
-                                        'angle_theta0'],
-                                outputs=['frc_f', 'ene'])
-        self.add_prim_attr('angle_numbers', self.angle_numbers)
-
-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
-                    angle_theta0_shape):
-        cls_name = self.name
-        N = uint_crd_f_shape[0]
-        M = atom_a_shape[0]
-        validator.check_int(
-            uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
-        validator.check_int(
-            scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(
-            atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(
-            atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(
-            atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(
-            angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
-        validator.check_int(
-            angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
-        return uint_crd_f_shape, [N,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
-                    angle_theta0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
-        return angle_k_type, angle_k_type
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Operators for sponge."""
+
+from ..primitive import PrimitiveWithInfer, prim_attr_register
+from ..._checkparam import Rel
+from ..._checkparam import Validator as validator
+from ...common import dtype as mstype
+
+
+class BondForce(PrimitiveWithInfer):
+    """
+    BondForce:
+
+    Calculate the force exerted by the simple harmonic bond on the
+    corresponding atoms. Assume the number of harmonic bonds is M and
+    the number of atoms is N.
+
+    .. math::
+
+        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
+        F = (F_x, F_y, F_z) = 2*k*(1 - r_0/|dr|)*dr
+
+    Inputs:
+        - **atom_numbers** (int32) - the number of atoms N.
+        - **bond_numbers** (int32) - the number of harmonic bonds M.
+        - **uint_crd_f** (Tensor, uint32 ) - [N, 3], the unsigned int coordinate
+        value of each atom.
+        - **scaler_f** (Tensor, float32) - [3,], the 3-D scale factor (x, y, z),
+    between the real space float coordinates and the unsigned int coordinates.
+        - **atom_a** (Tensor, int32) - [M,], the first atom index of each bond.
+        - **atom_b** (Tensor, int32) - [M,], the second atom index of each bond.
+        - **bond_k** (Tensor, float32) - [M,], the force constant of each bond.
+        - **bond_r0** (Tensor, float32) - [M,], the equlibrium length of each bond.
+
+    Outputs:
+        - **frc_f** (float32 Tensor) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, bond_numbers, atom_numbers):
+        self.bond_numbers = bond_numbers
+        self.atom_numbers = atom_numbers
+        self.add_prim_attr('bond_numbers', self.bond_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
+                                outputs=['frc_f'])
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
+        cls_name = self.name
+        N = self.atom_numbers
+        M = self.bond_numbers
+        validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
+        validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
+        return uint_crd_f_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
+        return bond_r0_type
+
+
+class BondEnergy(PrimitiveWithInfer):
+    """
+    BondEnergyCuda:
+
+    Calculate the harmonic potential energy between each bonded atom pair.
+    Assume our system has N atoms and M harmonic bonds.
+
+    .. math::
+
+        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
+        E = k*(|dr| - r_0)^2
+
+    Inputs:
+        Same as operator BondForce().
+
+    .. math::
+
+        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
+        E = k*(|dr| - r_0)^2
+
+    Outputs:
+        - **bond_ene** (Tensor, float32) - [M,], the harmonic potential energy
+        for each bond.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, bond_numbers, atom_numbers):
+        self.bond_numbers = bond_numbers
+        self.atom_numbers = atom_numbers
+        self.add_prim_attr('bond_numbers', self.bond_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
+                                outputs=['bond_ene'])
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
+        cls_name = self.name
+        N = self.atom_numbers
+        M = self.bond_numbers
+        validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
+        validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
+
+        return bond_k_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
+        return bond_r0_type
+
+
+class BondAtomEnergy(PrimitiveWithInfer):
+    """
+    BondAtomEnergyCuda:
+
+    Add the potential energy caused by simple harmonic bonds to the total
+    potential energy of each atom.
+
+    The calculation formula is the same as operator BondEnergy().
+
+    Inputs:
+        Same as operator BondForce().
+
+    Outputs:
+        - **atom_ene** (Tensor, float32) - [N,], the accumulated potential
+        energy for each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, bond_numbers, atom_numbers):
+        self.bond_numbers = bond_numbers
+        self.atom_numbers = atom_numbers
+        self.add_prim_attr('bond_numbers', self.bond_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
+                                outputs=['atom_ene'])
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
+        cls_name = self.name
+        N = self.atom_numbers
+        M = self.bond_numbers
+        validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
+        validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
+
+        return [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
+        return bond_r0_type
+
+
+class BondForceWithAtomEnergy(PrimitiveWithInfer):
+    """
+    BondForceWithAtomEnergy:
+
+    Calculate bond force and harmonic potential energy together.
+
+    The calculation formula is the same as operator BondForce() and BondEnergy().
+
+    Inputs:
+        Same as operator BondForce().
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce().
+        - **atom_e** (Tensor, float32) - [N,], same as atom_ene in operator BondAtomEnergy().
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, bond_numbers, atom_numbers):
+        self.bond_numbers = bond_numbers
+        self.atom_numbers = atom_numbers
+        self.add_prim_attr('bond_numbers', self.bond_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
+                                outputs=['frc_f', 'atom_e'])
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
+        cls_name = self.name
+        N = self.atom_numbers
+        M = self.bond_numbers
+        validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
+        validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
+
+        return uint_crd_f_shape, [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+
+        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
+        return bond_r0_type, bond_r0_type
+
+
+class BondForceWithAtomVirial(PrimitiveWithInfer):
+    """
+    BondForceWithAtomVirial:
+
+    Calculate bond force and the virial coefficient caused by simple harmonic
+    bond for each atom together.
+
+    The calculation formula of the force part is the same as operator BondForce().
+    The Virial part is as follows:
+
+    .. math::
+
+        dr = (x_1-x_2, y_1-y_2, z_1-z_2)
+        virial = |dr|*(|dr| - r_0)*k
+
+    Inputs:
+        Same as operator BondForce()
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce().
+        - **atom_v** (Tensor, float32) - [N,], the accumulated virial coefficient
+        for each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, bond_numbers, atom_numbers):
+        self.bond_numbers = bond_numbers
+        self.atom_numbers = atom_numbers
+        self.add_prim_attr('bond_numbers', self.bond_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'],
+                                outputs=['frc_f', 'atom_v'])
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape):
+        cls_name = self.name
+        N = self.atom_numbers
+        M = self.bond_numbers
+        validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name)
+        validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name)
+
+        return uint_crd_f_shape, [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+
+        validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name)
+        return bond_r0_type, bond_r0_type
+
+
+class DihedralForce(PrimitiveWithInfer):
+    """
+    DihedralForce:
+
+    Calculate the force exerted by the dihedral term which made of 4-atoms
+    on the corresponding atoms. Assume the number of dihedral terms is M and
+    the number of atoms is N.
+
+    .. math::
+
+        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
+        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
+        dr_{cd} = (x_d-x_c, y_d-y_c, z_d-z_c)
+
+        r1 = dr_{ab}*dr_{cb}
+        r2 = dr_{cd}*dr_{cb}
+
+        phi = pi - sign(inner_product(r1*r2), dr_{cb})
+            * arccos(inner_product(r1, r2)/|r1|/|r2|)
+        dEdphi = n*phi*(k*cos(phi_0)*sin(n*phi) - k*sin(phi_0)*cos(n*phi))/sin(phi)
+        dphidr1 = r2/|r1|/|r2| + cos(phi)/|r1|^2*r1
+        dphidr2 = r1/|r1|/|r2| + cos(phi)/|r2|^2*r2
+
+        dEdra = dEdphi * dr_{cb} * dphidr1
+        dEdrd = dEdphi * dphi_dr2 * dr_{cb}
+        dEdrjpart = dEdphi * ((dr_{ab} * dphidr1) + (dr_{cd} * dphidr2))
+
+        F_a = dEdri
+        F_b = dEdrjpart - dEdri
+        F_c = - dEdrl - dEdrjpart
+        F_d = dEdrl
+
+    Inputs:
+        - **dihedral_numbers** (int32) - the number of dihedral terms M.
+        - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinates
+        value of each atom.
+        - **scalar_f** (Tensor, float32) - [3, ], the 3-D scale factor between
+        the real space float coordinates and the unsigned int coordinates.
+        - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each dihedral.
+        - **atom_b** (Tensor, int32) - [M, ], the 2nd atom index of each dihedral.
+        - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each dihedral.
+        - **atom_d** (Tensor, int32) - [M, ], the 4th atom index of each dihedral.
+        4 atoms are connected in the form a-b-c-d.
+        - **ipn** (Tensor, int32) - [M, ], the period of dihedral angle of each dihedral.
+        - **pk** (Tensor, float32) - [M, ], the force constant of each dihedral.
+        - **gamc** (Tensor, float32) - [M, ], k*cos(phi_0) of each dihedral.
+        - **gams** (Tensor, float32) - [M, ], k*sin(phi_0) of each dihedral.
+        - **pn** (Tensor, float32) - [M, ], the floating point form of ipn.
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, dihedral_numbers):
+        self.dihedral_numbers = dihedral_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
+                                        'gamc', 'gams', 'pn'],
+                                outputs=['frc_f'])
+        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
+                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
+        cls_name = self.name
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
+        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
+        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
+        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
+        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
+        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
+        return uint_crd_f_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
+                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
+
+        return pn_type
+
+
+class DihedralEnergy(PrimitiveWithInfer):
+    """
+    DihedralEnergy:
+
+    Calculate the potential energy caused by dihedral terms for each 4-atom pair.
+    Assume our system has N atoms and M dihedral terms.
+
+    .. math::
+
+        E = k(1 + cos(n*phi - phi_0))
+
+    Inputs:
+        Same as operator DihedralForce().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [M, ], the potential energy for each
+        dihedral term.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, dihedral_numbers):
+        self.dihedral_numbers = dihedral_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
+                                        'gamc', 'gams', 'pn'],
+                                outputs=['ene'])
+        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
+                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
+        cls_name = self.name
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
+        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
+        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
+        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
+        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
+        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
+        return [M,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
+                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
+
+        return pn_type
+
+
+class DihedralAtomEnergy(PrimitiveWithInfer):
+    """
+    DihedralAtomEnergy:
+
+    Add the potential energy caused by dihedral terms to the total potential
+    energy of each atom.
+
+    The calculation formula is the same as operator DihedralEnergy().
+
+    Inputs:
+        Same as operator DihedralEnergy().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [N, ], the accumulated potential
+        energy for each atom.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, dihedral_numbers):
+        self.dihedral_numbers = dihedral_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
+                                        'gamc', 'gams', 'pn'],
+                                outputs=['ene'])
+        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
+                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
+        cls_name = self.name
+        N = uint_crd_f_shape[0]
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
+        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
+        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
+        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
+        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
+        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
+        return [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
+                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
+
+        return pn_type
+
+
+class DihedralForceWithAtomEnergy(PrimitiveWithInfer):
+    """
+    DihedralForceWithAtomEnergy:
+
+    Calculate dihedral force and potential energy together.
+
+    The calculation formula is the same as operator DihedralForce() and DihedralEnergy().
+
+    Inputs:
+        Same as operator DihedralForce().
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], same as operator DihedralForce().
+        - **ene** (Tensor, float32) - [N, ], same as operator DihedralAtomEnergy().
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, dihedral_numbers):
+        self.dihedral_numbers = dihedral_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk',
+                                        'gamc', 'gams', 'pn'],
+                                outputs=['frc_f', 'ene'])
+        self.add_prim_attr('dihedral_numbers', self.dihedral_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape,
+                    ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape):
+        cls_name = self.name
+        N = uint_crd_f_shape[0]
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name)
+        validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name)
+        validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name)
+        validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name)
+        validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name)
+        validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name)
+        return uint_crd_f_shape, [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type,
+                    ipn_type, pk_type, gamc_type, gams_type, pn_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name)
+
+        return pn_type, pn_type
+
+
+class AngleForce(PrimitiveWithInfer):
+    """
+    AngleForce:
+
+    Calculate the force exerted by angles made of 3 atoms on the
+    corresponding atoms. Assume the number of angles is M and the
+    number of atoms is N.
+
+    .. math::
+
+        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
+        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
+        theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|)
+        F_a = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{ab}|^2*dr_{ab}
+            - 1/|dr_{ab}|/|dr_{cb}|*dr_{cb}]
+        F_c = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{cb}|^2*dr_{cb}
+             - 1/|dr_{cb}|/|dr_{ab}|*dr_{ab}]
+        F_b = -F_a - F_c
+
+    Inputs:
+        - **angle_numbers** (int32) - the number of angles M.
+        - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate
+        value of each atom.
+        - **scaler_f** (Tensor, float32) - [3, ], the 3-D scale factor between
+        the real space float coordinates and the unsigned int coordinates.
+        - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each angle.
+        - **atom_b** (Tensor, int32) - [M, ], the 2nd and the central atom index
+        of each angle.
+        - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each angle.
+        - **angle_k** (Tensor, float32) - [M, ], the force constant for each angle.
+        - **angle_theta0** (Tensor, float32) - [M, ], the equilibrium position value
+        for each angle.
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, angle_numbers):
+        self.angle_numbers = angle_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
+                                        'angle_theta0'],
+                                outputs=['frc_f'])
+        self.add_prim_attr('angle_numbers', self.angle_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
+                    angle_theta0_shape):
+        cls_name = self.name
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
+        validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
+        return uint_crd_f_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
+                    angle_theta0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
+        return angle_k_type
+
+
+class AngleEnergy(PrimitiveWithInfer):
+    """
+    AngleEnergy:
+
+    Calculate the energy caused by 3-atoms angle term.
+
+    .. math::
+
+        dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a)
+        dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c)
+        theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|)
+        E = k*(theta - theta_0)^2
+
+    Inputs:
+        Same as operator AngleForce().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [M, ], the potential energy for
+        each angle term.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, angle_numbers):
+        self.angle_numbers = angle_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
+                                        'angle_theta0'],
+                                outputs=['ene'])
+        self.add_prim_attr('angle_numbers', self.angle_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
+                    angle_theta0_shape):
+        cls_name = self.name
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
+        validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
+        return [M,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
+                    angle_theta0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
+        return angle_k_type
+
+
+class AngleAtomEnergy(PrimitiveWithInfer):
+    """
+    AngleAtomEnergy:
+
+    Add the potential energy caused by angle terms to the total potential
+    energy of each atom.
+
+    The calculation formula is the same as operator AngleEnergy().
+
+    Inputs:
+        Same as operator AngleForce().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [N, ], the accumulated potential energy
+        for each atom.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, angle_numbers):
+        self.angle_numbers = angle_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
+                                        'angle_theta0'],
+                                outputs=['ene'])
+        self.add_prim_attr('angle_numbers', self.angle_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
+                    angle_theta0_shape):
+        cls_name = self.name
+        N = uint_crd_f_shape[0]
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
+        validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
+        return [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
+                    angle_theta0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
+        return angle_k_type
+
+
+class AngleForceWithAtomEnergy(PrimitiveWithInfer):
+    """
+    AngleForceWithAtomEnergy:
+
+    Calculate angle force and potential energy together.
+
+    The calculation formula is the same as operator AngleForce() and AngleEnergy().
+
+    Inputs:
+        Same as operator AngleForce().
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], same as operator AngleForce().
+        - **ene** (Tensor, float) - [N, ], same as operator AngleAtomEnergy().
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, angle_numbers):
+        self.angle_numbers = angle_numbers
+        self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k',
+                                        'angle_theta0'],
+                                outputs=['frc_f', 'ene'])
+        self.add_prim_attr('angle_numbers', self.angle_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
+                    angle_theta0_shape):
+        cls_name = self.name
+        N = uint_crd_f_shape[0]
+        M = atom_a_shape[0]
+        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name)
+        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
+        validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name)
+        validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name)
+        validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name)
+        validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name)
+        validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name)
+        return uint_crd_f_shape, [N,]
+
+    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
+                    angle_theta0_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name)
+        return angle_k_type, angle_k_type
+
+
+class Dihedral14LJForce(PrimitiveWithInfer):
+    """
+    Dihedral14LJForce:
+
+    Calculate the Lennard-Jones part of 1,4 dihedral force correction for
+    each necessary dihedral terms on the corresponding atoms. Assume the
+    number of necessary dihedral 1,4 terms is M, the number of atoms is N,
+    and the number of Lennard-Jones types for all atoms is P, which means
+    there will be Q = P*(P+1)/2 types of possible Lennard-Jones interactions
+    for all kinds of atom pairs.
+
+    .. math::
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z_b)
+        F = k*(-12*A/|dr|^{14} + 6*B/|dr|^{8})*dr
+
+    Inputs:
+        - **dihedral_14_numbers** (int32) - the number of necessary dihedral
+        1,4 terms M.
+        - **atom_numbers** (int32) - the number of atoms N.
+        - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate
+        value of each atom.
+        - **LJ_type** (Tensor, int32) - [N,], the Lennard-Jones type of each
+        atom.
+        - **charge** (Tensor, float32) - [N,], the charge of each atom.
+        - **boxlength_f** (Tensor, float32) - [3,], the length of molecular
+        simulation box in 3 dimensions.
+        - **a_14** (Tensor, int32) - [M,], the first atom index of each dihedral
+        1,4 term.
+        - **b_14** (Tensor, int32) - [M,], the second atom index of each dihedral
+        1,4 term.
+        - **lj_scale_factor** (Tensor, float32) - [M,], the scale factor for the
+        Lennard-Jones part of force correction of each dihedral 1,4 term.
+        - **LJ_type_A** (Tensor, float32) - [Q,], the A parameter in Lennard-Jones
+        scheme of each atom pair type.
+        - **LJ_type_B** (Tensor, float32) - [Q,], the B parameter in Lennard-Jones
+        shceme of each atom pair type.
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor',
+                    'LJ_type_A', 'LJ_type_B'],
+            outputs=['frc_f'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    lj_scale_factor_shape,
+                    LJ_type_A_shape, LJ_type_B_shape):
+        return uint_crd_f_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name)
+        return LJ_type_B_type
+
+
+class Dihedral14LJEnergy(PrimitiveWithInfer):
+    """
+    Dihedral14LJEnergy:
+
+    Calculate the Lennard-Jones part of 1,4 dihedral energy correction for
+    each necessary dihedral terms on the corresponding atoms.
+
+    .. math:, the :
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z-b)
+        E = k*(A/|dr|^{12} - B/|dr|^{6})
+
+    Inputs:
+        Same as operator Dihedral14LJForce().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [M,], the Lennard-Jones potential
+        energy correction for each necessary dihedral 1,4 term.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor',
+                    'LJ_type_A', 'LJ_type_B'],
+            outputs=['ene'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    lj_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape):
+        return [self.dihedral_14_numbers,]
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name)
+
+        return LJ_type_A_type
+
+
+class Dihedral14LJForceWithDirectCF(PrimitiveWithInfer):
+    """
+    Dihedral14LJForceWithDirectCF:
+
+    Calculate the Lennard-Jones part and the Coulomb part of force correction
+    for each necessary dihedral 1,4 terms.
+
+    The calculation formula of the Lennard-Jones part is the same as operator
+    Dihedral14LJForce(), and the Coulomb part is as follows:
+
+    .. math::
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z_b)
+        F = -k*q_a*q_b/|r|^3*dr
+
+    Inputs:
+        - **cf_scale_factor** (Tensor, float) - [M,], the scale factor for the
+        Coulomb part of force correction for each dihedral 1,4 terms.
+
+        The rest of the inputs is the same as operator Dihedral14LJForce().
+
+    Outputs:
+        - **frc_f** (Tensor, float) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor',
+                    'cf_scale_factor',
+                    'LJ_type_A', 'LJ_type_B'],
+            outputs=['frc_f'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    lj_scale_factor_shape, cf_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape):
+        return [self.atom_numbers, 3]
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    lj_scale_factor_type, cf_scale_factor_type, LJ_type_A_type, LJ_type_B_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name)
+
+        return LJ_type_A_type
+
+
+class Dihedral14LJCFForceWithAtomEnergy(PrimitiveWithInfer):
+    """
+    Dihedral14LJCFForceWithAtomEnergy:
+
+    Calculate the Lennard-Jones and Coulumb energy correction and force correction
+    for each necessary dihedral 1,4 terms together and add them to the total force
+    and potential energy for each atom.
+
+    The calculation formula of force correction is the same as operator
+    Dihedral14LJForceWithDirectCF(), and the energy correction part is the same
+    as operator Dihedral14LJEnergy() and Dihedral14CFEnergy().
+
+    Inputs:
+        Same as operator Dihedral14LJForceWithdirectCF().
+
+    Outputs:
+        - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom.
+        - **atom_energy** (Tensor, float32) - [N,], the accumulated potential
+        energy for each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor',
+                    'cf_scale_factor',
+                    'LJ_type_A', 'LJ_type_B'],
+            outputs=['frc_f', 'atom_energy'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    lj_scale_factor_shape, cf_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape):
+        return uint_crd_f_shape, charge_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    lj_scale_factor_type, cf_scale_factor_type, LJ_type_A_type, LJ_type_B_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name)
+
+        return charge_dtype, charge_dtype
+
+
+class Dihedral14LJAtomEnergy(PrimitiveWithInfer):
+    """
+    Dihedral14LJAtomEnergy:
+
+    Add the potenrial energy caused by Lennard-Jones energy correction for each
+    necessary dihedral 1,4 terms to the total potential energy of each atom.
+
+    The calculation formula is the same as operator Dihedral14LJEnergy().
+
+    Inputs:
+        Same as operator Dihedral14LJForce().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [N,], the accumulated potential energy of
+        each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor',
+                    'LJ_type_A', 'LJ_type_B'],
+            outputs=['ene'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    lj_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape):
+        return LJtype_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name)
+
+        return LJ_type_A_type
+
+
+class Dihedral14CFEnergy(PrimitiveWithInfer):
+    """
+    Dihedral14CFEnergy:
+
+    Calculate the Coulumb part of 1,4 dihedral energy correction for
+    each necessary dihedral terms on the corresponding atoms.
+
+    .. math::
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z_b)
+        E = k*q_a*q_b/|dr|
+
+    Inputs:
+        The meaning and type of each input is the same as that of operator
+        Dihedral14LJForceWithDirectCF().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [M,], the accumulated potential energy
+        of each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'cj_scale_factor'],
+            outputs=['ene'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    cf_scale_factor_shape):
+        return [self.dihedral_14_numbers,]
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    cf_scale_factor_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('lj_scale_factor_type', cf_scale_factor_type, [mstype.float32],
+                                           self.name)
+
+        return charge_dtype
+
+
+class Dihedral14CFAtomEnergy(PrimitiveWithInfer):
+    """
+    Dihedral14CFAtomEnergy:
+
+    Add the potential energy caused by Coulumb energy correction for each
+    necessary dihedral 1,4 terms to the total potential energy of each atom.
+
+    The calculation formula is the same as operator Dihedral14CFEnergy().
+
+    Inputs:
+        The meaning and type of each input is the same as that of operator
+        Dihedral14LJForceWithDirectCF().
+
+    Outputs:
+        - **ene** (Tensor, float32) - [N,], the accumulated potential energy
+        of each atom.
+
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+
+    @prim_attr_register
+    def __init__(self, nb14_numbers, atom_numbers):
+        self.dihedral_14_numbers = nb14_numbers
+        self.atom_numbers = atom_numbers
+
+        self.init_prim_io_names(
+            inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'cf_scale_factor'],
+            outputs=['ene'])
+        self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+
+    def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape,
+                    cf_scale_factor_shape):
+        return LJtype_shape
+
+    def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type,
+                    cf_scale_factor_type):
+        validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32],
+                                           self.name)
+
+        return charge_dtype
+
+
+class MDIterationLeapFrog(PrimitiveWithInfer):
+    """
+    MDIterationLeapFrog:
+
+    One step of classical leap frog algorithm to solve the finite difference
+    Hamiltonian equations of motion for certain system, using Langevin dynamics
+    with Liu's thermostat scheme. Assume the number of atoms is N and the target
+    control temperature is T.
+
+    Detailed iteration formula can be found in this paper: A unified thermostat
+    scheme for efficient configurational sampling for classical/quantum canonical
+    ensembles via molecular dynamics. DOI: 10.1063/1.4991621.
+
+    Inputs:
+        - **float4_numbers** (int32) - total length to store random numbers.
+        - **atom_numbers** (int32) - the number of atoms N.
+        - **dt** (float32) - time step for finite difference.
+        - **half_dt** (float32) - half of time step for finite difference.
+        - **exp_gamma** (float32) - parameter in Liu's dynamic, equals
+        exp(-gamma_ln * dt), where gamma_ln is the firction factor in Langvin
+        dynamics.
+        - **max_velocity** (float32) - the upper limit of velocity, when the
+        veclocity overflows, scale it to the upper limit.
+        - **is_max_velocity** (int32) - whether the max velocity control is
+        open or not.
+
+        - **mass_inverse** (Tensor, float32) - [N,], the inverse value of
+        mass of each atom.
+        - **sqrt_mass** (Tensor, float32) - [N,], the inverse square root value
+        of effect mass in Liu's dynamics of each atom.
+
+    Outputs:
+        - **vel** (Tensor, float32) - [N, 3], the velocity of each atom.
+        - **crd** (Tensor, float32) - [N, 3], the coordinate of each atom.
+        - **frc** (Tensor, float32) - [N, 3], the force felt by each atom.
+        - **acc** (Tensor, float32) - [N, 3], the acceleration of each atom.
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity):
+        self.float4_numbers = float4_numbers
+        self.atom_numbers = atom_numbers
+        self.half_dt = half_dt
+        self.dt = dt
+        self.exp_gamma = exp_gamma
+        self.is_max_velocity = is_max_velocity
+        self.max_velocity = max_velocity
+
+        self.init_prim_io_names(
+            inputs=['mass_inverse', 'sqrt_mass'],
+            outputs=['vel', 'crd', 'frc', 'acc'])
+        self.add_prim_attr('float4_numbers', self.float4_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('half_dt', self.half_dt)
+        self.add_prim_attr('dt', self.dt)
+        self.add_prim_attr('exp_gamma', self.exp_gamma)
+        self.add_prim_attr('is_max_velocity', self.is_max_velocity)
+        self.add_prim_attr('max_velocity', self.max_velocity)
+
+    def infer_shape(self, mass_inverse_shape, sqrt_mass_shape):
+        return [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3]
+
+    def infer_dtype(self, mass_inverse_dtype, sqrt_mass_dtype):
+        validator.check_tensor_dtype_valid('mass_inverse_dtype', mass_inverse_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('sqrt_mass_dtype', sqrt_mass_dtype, [mstype.float32], self.name)
+
+        return mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype
+
+
+class PMEReciprocalForce(PrimitiveWithInfer):
+    """
+    PMEReciprocalForce:
+
+    Calculate the reciprocal part of long-range Coulumb force using
+    PME(Particle Meshed Ewald) method. Assume the number of atoms is
+    N.
+
+    The detailed calculation formula of PME(Particle Meshed Ewald) method
+    can be found in this paper: A Smooth Particle Mesh Ewald Method. DOI:
+    10.1063/1.470117.
+
+    Inputs:
+        - **atom_numbers** (int32) - the number of atoms, N.
+        - **beta** (float32) - the PME beta parameter, determined by the
+        non-bond cutoff value and simulation precision tolerance.
+        - **fftx** (int32) - the number of points for Fourier transform
+        in dimension X.
+        - **ffty** (int32) - the number of points for Fourier transform
+        in dimension Y.
+        - **fftz** (int32) - the number of points for Fourier transform
+        in dimension Z.
+
+        - **boxlength** (Tensor, float32) - [3,], the length of simulation
+        box in 3 dimensions.
+        - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinates
+        value of each atom.
+        - **charge** (Tensor, float32) - [N,], the charge carried by each
+        atom.
+
+    Outputs:
+        - **force** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ```GPU```
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, beta, fftx, ffty, fftz):
+        self.atom_numbers = atom_numbers
+        self.beta = beta
+        self.fftx = fftx
+        self.ffty = ffty
+        self.fftz = fftz
+        self.init_prim_io_names(inputs=['boxlength', 'uint_crd', 'charge'],
+                                outputs=['force'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('beta', self.beta)
+        self.add_prim_attr('fftx', self.fftx)
+        self.add_prim_attr('ffty', self.ffty)
+        self.add_prim_attr('fftz', self.fftz)
+
+    def infer_shape(self, boxlength_shape, uint_crd_shape, charge_shape):
+        return uint_crd_shape
+
+    def infer_dtype(self, boxlength_type, uint_crd_type, charge_type):
+        validator.check_tensor_dtype_valid('boxlength_type', boxlength_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('uint_crd_type', uint_crd_type, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('charge_type', charge_type, [mstype.float32], self.name)
+        return charge_type
+
+
+class PMEExcludedForce(PrimitiveWithInfer):
+    """
+    PMEExcludedForce:
+
+    Calculate the excluded  part of long-range Coulumb force using
+    PME(Particle Meshed Ewald) method. Assume the number of atoms is
+    N, and the length of excluded list is E.
+
+    Inputs:
+        - **atom_numbers** (int32) - the number of atoms, N.
+        - **beta** (float32) - the PME beta parameter, determined by the
+        non-bond cutoff value and simulation precision tolerance.
+        - **scaler** (Tensor, float32) - [3,], the scale factor between real space
+        coordinates and its unsigned int value.
+        - **excluded_list_start** (Tensor, int32) - [N,], the start excluded index
+        in excluded list for each atom.
+        - **excluded_numbers** (Tensor, int32) - [N,], the number of atom excluded
+        in excluded list for each atom.
+        - **excluded_list** (Tensor, int32) - [E,], the contiguous join of excluded
+        list of each atom.
+
+        The rest of the input is the same as that of operator PMEReciprocalForce().
+
+    Outputs:
+        - **force** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ```GPU```
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, beta):
+        self.atom_numbers = atom_numbers
+        self.beta = beta
+        self.init_prim_io_names(
+            inputs=['uint_crd', 'sacler', 'charge', 'excluded_list_start', 'excluded_list', 'excluded_atom_numbers'],
+            outputs=['force'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('beta', self.beta)
+
+    def infer_shape(self, uint_crd_shape, sacler_shape, charge_shape, excluded_list_start_shape, excluded_list_shape,
+                    excluded_atom_numbers_shape):
+        return uint_crd_shape
+
+    def infer_dtype(self, uint_crd_type, sacler_type, charge_type, excluded_list_start_type, excluded_list_type,
+                    excluded_atom_numbers_type):
+        validator.check_tensor_dtype_valid('sacler_type', sacler_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('uint_crd_type', uint_crd_type, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('charge_type', charge_type, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('excluded_list_start_type', excluded_list_start_type, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list_type', excluded_list_type, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_atom_numbers_type', excluded_atom_numbers_type, [mstype.int32],
+                                           self.name)
+        return charge_type
+
+
+class PMEEnergy(PrimitiveWithInfer):
+    """
+    PMEEnergy:
+
+    Calculate the Coulumb energy of the system using PME method.
+
+    .. math::
+
+        E = sum_{ij} q_iq_j/r_{ij}
+
+    Inputs:
+        Same as that of operator PMEReciprocalForce(), PMEExcludedForce()
+        and PMEDirectAtomEnergy().
+
+    Outputs:
+        - **reciprocal_ene** (float32) - the reciprocal term of PME energy.
+        - **self_ene** (float32) - the self term of PME energy.
+        - **direct_ene** (float32) - the direct term of PME energy.
+        - **correction_ene** (float32) - the correction term of PME energy.
+
+    Supported Platforms:
+        ```GPU``
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, beta, fftx, ffty, fftz):
+        self.atom_numbers = atom_numbers
+        self.beta = beta
+        self.fftx = fftx
+        self.ffty = ffty
+        self.fftz = fftz
+        self.init_prim_io_names(
+            inputs=['box_length', 'uint_crd', 'charge', 'nl_numbers', 'nl_serial', 'scaler', 'excluded_list_start',
+                    'excluded_list', 'excluded_atom_numbers'],
+            outputs=['reciprocal_ene', 'self_ene', 'direct_ene', 'correction_ene'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('beta', self.beta)
+        self.add_prim_attr('fftx', self.fftx)
+        self.add_prim_attr('ffty', self.ffty)
+        self.add_prim_attr('fftz', self.fftz)
+
+    def infer_shape(self, box_length, uint_crd, charge, nl_numbers, nl_serial, scaler, excluded_list_start,
+                    excluded_list, excluded_atom_numbers):
+        return (1,), (1,), (1,), (1,)
+
+    def infer_dtype(self, box_length, uint_crd, charge, nl_numbers, nl_serial, scaler, excluded_list_start,
+                    excluded_list, excluded_atom_numbers):
+        validator.check_tensor_dtype_valid('box_length', box_length, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list', excluded_list, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_atom_numbers', excluded_atom_numbers, [mstype.int32],
+                                           self.name)
+        return charge, charge, charge, charge
+
+
+class LJEnergy(PrimitiveWithInfer):
+    """
+    LJEnergy:
+
+    Calculate the Van der Waals interaction energy described by Lennard-Jones
+    potential for each atom. Assume the number of atoms is N, and the number
+    of Lennard-Jones types for all atoms is P, which means there will be
+    Q = P*(P+1)/2 types of possible Lennard-Jones interactions for all kinds
+    of atom pairs.
+
+
+    .. math::
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z_b)
+        E = A/|dr|^{12} - B/|dr|^{6}
+
+    Inputs:
+        - **atom_numbers** (int32) - the number of atoms, N.
+        - **cutoff_square** (float32) - the square value of cutoff.
+        - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinate
+        value of each atom.
+        - **LJtype** (Tensor, int32) - [N,], the Lennard-Jones type of each
+        atom.
+        - **charge** (Tensor, float32) - [N,], the charge carried by each
+        atom.
+        - **scaler** (Tensor, float32) - [3,], the scale factor between real
+        space coordinate and its unsigned int value.
+        - **nl_numbers** - (Tensor, int32) - [N,], the each atom.
+        - **nl_serial** - (Tensor, int32) - [N, 800], the neighbor list of each atom,
+        the max number is 800.
+        - **d_LJ_A** (Tensor, float32) - [Q,], the Lennard-Jones A coefficient
+        of each kind of atom pair.
+        - **d_LJ_B** (Tensor, float32) - [Q,], the Lennard-Jones B coefficient
+        of each kind of atom pair.
+
+    Outputs:
+        - **d_LJ_energy_atom** (Tensor, float32) - [N,], the Lennard-Jones
+        potential energy of each atom.
+        - **d_LJ_energy_sum** (float32), the sum of Lennard-Jones potential
+        energy of each atom.
+
+    Supported Platforms:
+        ```GPU```
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, cutoff_square):
+        self.atom_numbers = atom_numbers
+        self.cutoff_square = cutoff_square
+        self.init_prim_io_names(
+            inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'],
+            outputs=['d_LJ_energy_atom'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('cutoff_square', self.cutoff_square)
+
+    def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        return charge
+
+    def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name)
+        return charge
+
+
+class LJForce(PrimitiveWithInfer):
+    """
+    LJForce:
+
+    Calculate the Van der Waals interaction force described by Lennard-Jones
+    potential energy for each atom.
+
+    .. math::
+
+        dr = (x_a-x_b, y_a-y_b, z_a-z_b)
+        F = (-12*A/|dr|^{14} + 6*B/|dr|^{8}) * dr
+
+    Inputs:
+        Same as operator LJEnergy().
+
+    outputs:
+        - **frc** (Tensor, float32) - [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ```GPU```
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, cutoff_square):
+        self.atom_numbers = atom_numbers
+        self.cutoff_square = cutoff_square
+        self.init_prim_io_names(
+            inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'],
+            outputs=['frc'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('cutoff_square', self.cutoff_square)
+
+    def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        return uint_crd
+
+    def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name)
+        return charge
+
+
+class LJForceWithPMEDirectForce(PrimitiveWithInfer):
+    """
+    LJForceWithPMEDirectForce:
+
+    Calculate the Lennard-Jones force and PME direct force together.
+
+    The calculation formula of Lennard-Jones part is the same as operator
+    LJForce(), and the PME direct part is within PME method.
+
+    Inputs:
+        - **pme_beta** (float32), PME beta parameter, same as operator
+        PMEReciprocalForce().
+
+        the rest of the input is the same as operator LJForce().
+
+    Outputs:
+        - **frc** (Tensor, float32), [N, 3], the force felt by each atom.
+
+    Supported Platforms:
+        ```GPU```
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, atom_numbers, cutoff, pme_beta):
+        self.atom_numbers = atom_numbers
+        self.cutoff = cutoff
+        self.pme_beta = pme_beta
+        self.init_prim_io_names(
+            inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'],
+            outputs=['frc'])
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('cutoff', self.cutoff)
+        self.add_prim_attr('pme_beta', self.pme_beta)
+
+    def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        return uint_crd
+
+    def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B):
+        validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name)
+        return charge
+
+
+class GetCenterOfGeometry(PrimitiveWithInfer):
+    """
+    GetCenterOfGeometry:
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, center_numbers, center_numbers_inverse):
+        self.center_numbers = center_numbers
+        self.center_numbers_inverse = center_numbers_inverse
+        self.add_prim_attr('center_numbers', self.center_numbers)
+        self.add_prim_attr('center_numbers_inverse', self.center_numbers_inverse)
+        self.init_prim_io_names(
+            inputs=['center_atoms', 'crd_f'],
+            outputs=['center_of_geometry_f'])
+
+    def infer_shape(self, center_atoms_shape, crd_f_shape):
+        cls_name = self.name
+        N = self.center_numbers
+        validator.check_int(center_atoms_shape[0], N, Rel.EQ, "center_atoms_shape", cls_name)
+        validator.check_int(crd_f_shape[0], N, Rel.EQ, "crd_f_shape", cls_name)
+        validator.check_int(crd_f_shape[1], 3, Rel.EQ, "crd_f_shape", cls_name)
+        return [3,]
+
+    def infer_dtype(self, center_atoms_dtype, crd_f_dtype):
+        validator.check_tensor_dtype_valid('center_atoms_dtype', center_atoms_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('crd_f_dtype', crd_f_dtype, [mstype.float32], self.name)
+
+        return crd_f_dtype
+
+
+class MDTemperature(PrimitiveWithInfer):
+    """
+    MDTemperature:
+
+    Supported Platforms:
+        ``GPU``
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, residue_numbers):
+        self.residue_numbers = residue_numbers
+        self.add_prim_attr('residue_numbers', self.residue_numbers)
+        self.init_prim_io_names(
+            inputs=['start', 'end', 'atom_vel_f', 'atom_mass'],
+            outputs=['ek'])
+
+    def infer_shape(self, start_shape, end_shape, atom_vel_f_shape, atom_mass_shape):
+        cls_name = self.name
+        N = self.residue_numbers
+        validator.check_int(start_shape[0], N, Rel.EQ, "start_shape", cls_name)
+        validator.check_int(end_shape[0], N, Rel.EQ, "end_shape", cls_name)
+        validator.check_int(atom_vel_f_shape[0], N, Rel.EQ, "atom_vel_f_shape", cls_name)
+        validator.check_int(atom_vel_f_shape[1], 3, Rel.EQ, "atom_vel_f_shape", cls_name)
+        validator.check_int(atom_mass_shape[0], N, Rel.EQ, "atom_mass_shape", cls_name)
+        return [N,]
+
+    def infer_dtype(self, start_dtype, end_dtype, atom_vel_f_dtype, atom_mass_dtype):
+        validator.check_tensor_dtype_valid('start_dtype', start_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('end_dtype', end_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('atom_vel_f_dtype', atom_vel_f_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('atom_mass_dtype', atom_mass_dtype, [mstype.float32], self.name)
+        return atom_mass_dtype
+
+
+class NeighborListUpdate(PrimitiveWithInfer):
+    """
+    NeighborListUpdate
+
+    Update (or construct if first time) the Verlet neighbor list for the
+    calculation of short-ranged force. Assume the number of atoms is N,
+    the number of grids divided is G, the maximum number of atoms in one
+    grid is M, the maximum number of atoms in single atom's neighbor list
+    is L, and the number of total atom in excluded list is E.
+
+    Inputs:
+        - **grid_numbers** (int32) - the total number of grids divided.
+        - **refresh_count** (int32) - the counter which counts how many
+        iteration steps have passed since last update.
+        - **not_first_time** (int32) - whether to construct the neighbor
+        list first time or not.
+        - **Nxy** (int32) - the total number of grids divided in xy plane.
+        - **excluded_atom_numbers** (int32) - the total atom numbers in
+        the excluded list.
+        - **cutoff** (float32) - the cutoff distance for short-range force
+        calculation.
+        - **skin** (float32) - the overflow value of cutoff to maintain a
+        neighbor list.
+        - **cutoff_square** (float32) - the suqare value of cutoff.
+        - **half_skin_square** (float32) - skin*skin/4, indicates the maximum
+        square value of the distance atom allowed to move between two updates.
+        - **cutoff_with_skin** (float32) - cutoff + skin, indicates the
+        radius of the neighbor list for each atom.
+        - **half_cutoff_with_skin** (float32) - cutoff_with_skin/2.
+        - **cutoff_with_skin_square** (float32) - the square value of
+        cutoff_with_skin.
+        - **refresh_interval** (int32) - the number of iteration steps
+        between two updates of neighbor list.
+        - **max_atom_in_grid_numbers** (int32) - the maximum number of atoms
+        in one grid.
+
+        - **atom_numbers_in_grid_bucket** (Tensor, int32) - [G,], the number
+        of atoms in each grid bucket.
+        - **bucket** (Tensor, int32) - (Tensor,int32) - [G, M], the atom
+        indices in each grid bucket.
+        - **crd** (Tensor, float32) - [N,], the coordinates of each atom.
+        - **box_length** (Tensor, float32) - [3,], the length of 3 dimensions
+        of the simulation box.
+        - **grid_N** (Tensor, int32) - [3,], the number of grids divided of
+        3 dimensions of the simulation box.
+        - **grid_length_inverse** (float32) - the inverse value of grid length.
+        - **atom_in_grid_serial** (Tensor, int32) - [N,], the grid index for
+        each atom.
+        - **old_crd** (Tensor, float32) - [N, 3], the coordinates before update
+        of each atom.
+        - **crd_to_uint_crd_cof** (Tensor, float32) - [3,], the scale factor
+        between the unsigned int value and the real space coordinates.
+        - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinates
+        value fo each atom.
+        - **gpointer** (Tensor, int32) - [G, 125], the 125 nearest neighbor grids
+        (including self) of each grid.
+        - **nl_atom_numbers** (Tensor, int32) - [N,], the number of atoms in
+        neighbor list of each atom.
+        - **nl_atom_serial** (Tensor, int32) - [N, L], the indices of atoms in
+        neighbor list of each atom.
+        - **uint_dr_to_dr_cof** (Tensor, float32) - [3,], the scale factor between
+        the real space coordinates and the unsigned int value.
+        - **excluded_list_start** (Tensor, int32) - [N,], the start excluded
+        index in excluded list for each atom.
+        - **excluded_numbers** (Tensor, int32) - [N,], the number of atom excluded
+        in excluded list for each atom.
+        - **excluded_list** (Tensor, int32) - [E,], the contiguous join of excluded
+        list of each atom.
+        - **need_refresh_flag** (Tensor, int32) - [N,], whether the neighbor list
+        of each atom need update or not.
+
+    Outputs:
+        - **res** (float32)
+
+    Supported Platforms:
+        ```GPU```
+
+    Examples:
+    """
+
+    @prim_attr_register
+    def __init__(self, grid_numbers, atom_numbers, refresh_count, not_first_time, Nxy, excluded_atom_numbers,
+                 cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square,
+                 refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800):
+        self.grid_numbers = grid_numbers
+        self.atom_numbers = atom_numbers
+        self.refresh_count = refresh_count
+        self.refresh_interval = refresh_interval
+        self.not_first_time = not_first_time
+        self.cutoff = cutoff
+        self.skin = skin
+        self.max_atom_in_grid_numbers = max_atom_in_grid_numbers
+        self.Nxy = Nxy
+        self.excluded_atom_numbers = excluded_atom_numbers
+        self.cutoff_square = cutoff_square
+        self.half_skin_square = half_skin_square
+        self.cutoff_with_skin = cutoff_with_skin
+        self.half_cutoff_with_skin = half_cutoff_with_skin
+        self.cutoff_with_skin_square = cutoff_with_skin_square
+        self.max_neighbor_numbers = max_neighbor_numbers
+        self.init_prim_io_names(
+            inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_N', 'grid_length_inverse',
+                    'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers',
+                    'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers',
+                    'need_refresh_flag'], outputs=['res'])
+
+        self.add_prim_attr('grid_numbers', self.grid_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('refresh_count', self.refresh_count)
+        self.add_prim_attr('refresh_interval', self.refresh_interval)
+        self.add_prim_attr('not_first_time', self.not_first_time)
+        self.add_prim_attr('cutoff', self.cutoff)
+        self.add_prim_attr('skin', self.skin)
+        self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers)
+        self.add_prim_attr('Nxy', self.Nxy)
+        self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers)
+        self.add_prim_attr('cutoff_square', self.cutoff_square)
+        self.add_prim_attr('half_skin_square', self.half_skin_square)
+        self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin)
+        self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin)
+        self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square)
+
+    def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_N_shape,
+                    grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape,
+                    uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape,
+                    uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape,
+                    need_refresh_flag_shape):
+        assert len(atom_numbers_in_grid_bucket_shape) == 1
+        assert len(bucket_shape) == 2
+        assert len(crd_shape) == 2
+        assert len(box_length_shape) == 1
+        assert len(grid_N_shape) == 1
+        assert len(grid_length_inverse_shape) == 1
+        assert len(atom_in_grid_serial_shape) == 1
+        assert len(old_crd_shape) == 2
+        assert len(crd_to_uint_crd_cof_shape) == 1
+        assert len(uint_crd_shape) == 2
+        assert len(gpointer_shape) == 2
+        assert len(nl_atom_numbers_shape) == 1
+        assert len(nl_atom_serial_shape) == 2
+        assert len(uint_dr_to_dr_cof_shape) == 1
+        assert len(excluded_list_start_shape) == 1
+        assert len(excluded_list_shape) == 1
+        assert len(excluded_numbers_shape) == 1
+        assert len(need_refresh_flag_shape) == 1
+
+        validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ,
+                            "atom_numbers_in_grid_bucket_shape", self.name)
+        validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket_shape", self.name)
+        validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket_shape", self.name)
+        validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd_shape", self.name)
+        validator.check_int(crd_shape[1], 3, Rel.EQ, "crd_shape", self.name)
+        validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length_shape", self.name)
+        validator.check_int(grid_N_shape[0], 3, Rel.EQ, "grid_N_shape", self.name)
+        validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse_shape", self.name)
+        validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial_shape",
+                            self.name)
+        validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd_shape", self.name)
+        validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd_shape", self.name)
+        validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof_shape", self.name)
+        validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd_shape", self.name)
+        validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd_shape", self.name)
+        validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer_shape", self.name)
+        validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer_shape", self.name)
+        validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers_shape", self.name)
+        validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial_shape", self.name)
+        validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial_shape",
+                            self.name)
+        validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof_shape", self.name)
+        validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start_shape",
+                            self.name)
+        validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list_shape",
+                            self.name)
+        validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers_shape", self.name)
+        validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag_shape", self.name)
+
+        return [1,]
+
+    def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_N_dtype,
+                    grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype,
+                    uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype,
+                    uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype,
+                    need_refresh_flag_dtype):
+        validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket_dtype', atom_numbers_in_grid_bucket_dtype,
+                                           [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('bucket_dtype', bucket_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('crd_dtype', crd_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('box_length_dtype', box_length_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('grid_N_dtype', grid_N_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('grid_length_inverse_dtype', grid_length_inverse_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('atom_in_grid_serial_dtype', atom_in_grid_serial_dtype, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('old_crd_dtype', old_crd_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('crd_to_uint_crd_cof_dtype', crd_to_uint_crd_cof_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('uint_crd_dtype', uint_crd_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('gpointer_dtype', gpointer_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_atom_numbers_dtype', nl_atom_numbers_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_atom_serial_dtype', nl_atom_serial_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('uint_dr_to_dr_cof_dtype', uint_dr_to_dr_cof_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list_start_dtype', excluded_list_start_dtype, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list_dtype', excluded_list_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('excluded_numbers_dtype', excluded_numbers_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('need_refresh_flag_dtype', need_refresh_flag_dtype, [mstype.int32],
+                                           self.name)
+
+        return mstype.float32