master 03252 sponge ops zoo cu

4 years ago · aa7fdf53a7
parent a48785cdcc
commit aa7fdf53a7
33 changed files with 441 additions and 198 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_atom_energy_impl.cu
@ -50,7 +50,7 @@ __global__ void AngleAtomEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECT
 void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
                     const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
                     const float *angle_theta0, float *ene, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_force_impl.cu
@ -69,7 +69,7 @@ __global__ void AngleForceKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *u
 void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
                const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
                cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cu
@ -73,7 +73,7 @@ __global__ void AngleForceWithAtomEnergyKernel(int angle_numbers, const UNSIGNED
 void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
                              const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
                              const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cu
@ -41,7 +41,7 @@ __global__ void BondAtomEnergyCudaKernel(const int bond_numbers, const UNSIGNED_
 void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
                    const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene,
                    cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128>>>(atom_numbers, atom_ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_ene, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cu
@ -45,7 +45,7 @@ __global__ void BondForceCudaKernel(int bond_numbers, const UNSIGNED_INT_VECTOR

 void BondForce(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
               const int *atom_b, const float *bond_k, const float *bond_r0, float *frc_f, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cu
@ -52,8 +52,8 @@ __global__ void BondForceWithAtomEnergyKernel(int bond_numbers, const UNSIGNED_I
 void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
                             const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
                             float *frc_f, float *atom_e, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
-  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128>>>(atom_numbers, atom_e, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_e, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cu
@ -52,8 +52,8 @@ __global__ void BondForceWithAtomVirialKernel(int bond_numbers, const UNSIGNED_I
 void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
                             const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
                             float *frc_f, float *atom_v, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
-  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128>>>(atom_numbers, atom_v, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu
@ -67,7 +67,7 @@ void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_
                        const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
                        const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
                        cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu
@ -103,7 +103,7 @@ void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f
                   const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
                   const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f,
                   cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cu
@ -107,7 +107,7 @@ void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const i
                                 const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
                                 const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn,
                                 float *frc_f, float *ene, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
  UNSIGNED_INT_VECTOR *uint_crd =
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu
@ -92,7 +92,7 @@ void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_
             const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
             int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f,
             cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
  int max_neighbor_numbers = 800;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu
@ -106,7 +106,7 @@ void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const
                               const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
                               int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
                               const float *d_LJ_B, float *frc_f, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
  VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
  int max_neighbor_numbers = 800;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu
@ -66,7 +66,7 @@ void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_number
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);

  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
  Dihedral14CFAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
@ -52,21 +52,20 @@ __global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UI
 }

 void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
-                        const float *cf_scale_factor, float *ene, cudaStream_t stream) {
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
-  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
-  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
-
  UNSIGNED_INT_VECTOR *uint_crd =
    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));

+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
+
  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);

  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
  Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);

@ -76,5 +75,5 @@ void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, c
 }

 void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
-                        const float *cf_scale_factor, float *ene, cudaStream_t stream);
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh
@ -20,6 +20,6 @@
 #include "runtime/device/gpu/cuda_common.h"

 void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength, const int *a_14, const int *b_14,
-                        const float *cf_scale_factor, float *ene, cudaStream_t stream);
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
 #endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu
@ -87,7 +87,7 @@ void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_number
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);

  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
  Dihedral14LJAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
    dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
@ -105,22 +105,23 @@ __global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_nu
 }

 void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
-                                       const int *LJtype, const float *charge, const float *boxlength_f,
-                                       const int *a_14, const int *b_14, const float *lj_scale_factor,
-                                       const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
-                                       float *frc_f, float *atom_energy, cudaStream_t stream) {
+                                       const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
+                                       const float *boxlength_f, const int *a_14, const int *b_14,
+                                       const float *lj_scale_factor, const float *cf_scale_factor,
+                                       const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
+                                       cudaStream_t stream) {
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
-  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
-  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
-
  UNSIGNED_INT_VECTOR *uint_crd =
    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));

+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
+
  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, atom_energy, 0.);
+
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));

@ -133,8 +134,9 @@ void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int
  return;
 }

-void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
-                                   const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
-                                   const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
-                                   const float *LJ_type_A, const float *LJ_type_B, float *frc, float *atom_energy,
-                                   cudaStream_t stream);
+void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
+                                       const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
+                                       const float *boxlength_f, const int *a_14, const int *b_14,
+                                       const float *lj_scale_factor, const float *cf_scale_factor,
+                                       const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
+                                       cudaStream_t stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh
@ -20,8 +20,9 @@
 #include "runtime/device/gpu/cuda_common.h"

 void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
-                                       const int *LJtype, const float *charge, const float *boxlength_f,
-                                       const int *a_14, const int *b_14, const float *lj_scale_factor,
-                                       const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
-                                       float *frc, float *atom_energy, cudaStream_t stream);
+                                       const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
+                                       const float *boxlength_f, const int *a_14, const int *b_14,
+                                       const float *lj_scale_factor, const float *cf_scale_factor,
+                                       const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
+                                       cudaStream_t stream);
 #endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
@ -72,20 +72,19 @@ __global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UI
 }

 void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
-                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
-                        cudaStream_t stream) {
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                        float *ene, cudaStream_t stream) {
  size_t thread_per_block = 128;
  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
-  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
-  Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
-
  UNSIGNED_INT_VECTOR *uint_crd =
    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));

+  UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
+
  Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(dihedral_14_numbers, ene, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(dihedral_14_numbers, ene, 0.);
  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));

  Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
@ -97,6 +96,6 @@ void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, c
 }

 void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
-                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
-                        cudaStream_t stream);
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                        float *ene, cudaStream_t stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh
@ -20,8 +20,8 @@
 #include "runtime/device/gpu/cuda_common.h"

 void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
-                        const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
-                        const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene,
-                        cudaStream_t stream);
+                        const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
+                        const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
+                        float *ene, cudaStream_t stream);

 #endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu
@ -108,7 +108,7 @@ void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom
    atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
  cudaStreamSynchronize(stream);
  VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));

  Dihedral14LJForceWithDirectCFKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu
@ -97,9 +97,6 @@ void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const
                         const float exp_gamma, const int is_max_velocity, const float max_velocity,
                         const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
                         float *frc_f, float *acc_f, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, vel_f, 0.);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, crd_f, 0.);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.);

  VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu
@ -86,7 +86,7 @@ __global__ void PME_Excluded_Force_Correction(const int atom_numbers, const UNSI
 void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f,
                      const float *charge, const int *excluded_list_start, const int *excluded_list,
                      const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
  UNSIGNED_INT_VECTOR *uint_crd =
    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
  VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu
@ -75,7 +75,7 @@ void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float be
                        float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
                        const float *box_length_f, const int *uint_crd_f, const float *charge, float *force,
                        cudaStream_t stream) {
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, force, 0.);
+  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, force, 0.);
  UNSIGNED_INT_VECTOR *uint_crd =
    const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
  UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
@ -64,7 +64,7 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel {
  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

-  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
    auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
@ -74,9 +74,10 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel {
    auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
    auto cf_scale_factor = GetDeviceAddress<T>(inputs, 6);
    auto ene = GetDeviceAddress<T>(outputs, 0);
+    auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);

-    Dihedral14CFEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14,
-                       cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr));
+    Dihedral14CFEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, uint_crd_with_LJ, boxlength_f,
+                       a_14, b_14, cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr));

    return true;
  }
@ -90,7 +91,7 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel {
    input_size_list_.push_back(ele_a_14 * sizeof(T1));
    input_size_list_.push_back(ele_b_14 * sizeof(T1));
    input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
-
+    workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
    output_size_list_.push_back(atom_numbers * sizeof(T));
  }

@ -108,6 +109,13 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel {
  std::vector<size_t> workspace_size_list_;
  int dihedral_14_numbers;
  int atom_numbers;
+  struct UINT_VECTOR_LJ_TYPE {
+    unsigned int uint_x;
+    unsigned int uint_y;
+    unsigned int uint_z;
+    int LJ_type;
+    float charge;
+  };
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/Show More
+++ b/Show More