From 3dae6103ecaceea49a7ea265f3945a2fa9c3cb74 Mon Sep 17 00:00:00 2001
From: mamba_ni <niningxi@huawei.com>
Date: Fri, 2 Apr 2021 14:13:28 +0800
Subject: [PATCH] fix nb14 ops

---
 .../cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu    | 7 ++-----
 .../nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu  | 4 +---
 .../cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu    | 7 ++-----
 .../gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h         | 2 +-
 .../gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h         | 2 +-
 5 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
index 9ed64ea711..d12acde026 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu
@@ -54,8 +54,8 @@ __global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UI
 void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
                         const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
                         const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
-  size_t thread_per_block = 128;
-  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  size_t thread_per_block = 32;
+  size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
   UNSIGNED_INT_VECTOR *uint_crd =
     const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
 
@@ -65,12 +65,9 @@ void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, c
     atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
 
   VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
   Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
     dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
 
-  cudaStreamSynchronize(stream);
-
   return;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
index 730310c89c..88134c65ff 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu
@@ -111,7 +111,7 @@ void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int
                                        const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
                                        cudaStream_t stream) {
   size_t thread_per_block = 128;
-  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 128);
   UNSIGNED_INT_VECTOR *uint_crd =
     const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
 
@@ -129,8 +129,6 @@ void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int
     dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
     LJ_type_B, frc, atom_energy);
 
-  cudaStreamSynchronize(stream);
-
   return;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
index 54a24a565a..ef70d4ccfe 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu
@@ -75,8 +75,8 @@ void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, c
                         const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
                         const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
                         float *ene, cudaStream_t stream) {
-  size_t thread_per_block = 128;
-  size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
+  size_t thread_per_block = 32;
+  size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
   UNSIGNED_INT_VECTOR *uint_crd =
     const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
 
@@ -84,14 +84,11 @@ void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, c
 
   Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
     atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
-  Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(dihedral_14_numbers, ene, 0.);
   VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
 
   Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
     dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
 
-  cudaStreamSynchronize(stream);
-
   return;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
index c30eff3d1e..2d94c8ed95 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h
@@ -92,7 +92,7 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel {
     input_size_list_.push_back(ele_b_14 * sizeof(T1));
     input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
     workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
-    output_size_list_.push_back(atom_numbers * sizeof(T));
+    output_size_list_.push_back(dihedral_14_numbers * sizeof(T));
   }
 
  private:
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
index 82f81355e5..bc7bc84435 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h
@@ -102,7 +102,7 @@ class Dihedral14LJEnergyGpuKernel : public GpuKernel {
     input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
     workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
 
-    output_size_list_.push_back(atom_numbers * sizeof(T));
+    output_size_list_.push_back(dihedral_14_numbers * sizeof(T));
   }
 
  private: