From 769243673a8dc9a16289f18c43bc59ad1cb60b1d Mon Sep 17 00:00:00 2001 From: zhangxinfeng3 Date: Mon, 8 Mar 2021 17:24:39 +0800 Subject: [PATCH] Add some modules of Sponge --- mindspore/ccsrc/CMakeLists.txt | 3 +- .../cuda_impl/sponge/common/getcenter_impl.cu | 45 + .../sponge/common/getcenter_impl.cuh | 26 + .../sponge/common/mdtemperature_impl.cu | 51 + .../sponge/common/mdtemperature_impl.cuh | 25 + .../gpu/cuda_impl/sponge/common_sponge.cuh | 163 +- .../dihedral/dihedral_atom_energy_impl.cu | 2 - .../sponge/dihedral/dihedral_energy_impl.cu | 2 - .../sponge/dihedral/dihedral_force_impl.cu | 1 - .../gpu/cuda_impl/sponge/lj/lj_energy_impl.cu | 102 + .../cuda_impl/sponge/lj/lj_energy_impl.cuh | 27 + .../gpu/cuda_impl/sponge/lj/lj_force_impl.cu | 116 + .../gpu/cuda_impl/sponge/lj/lj_force_impl.cuh | 27 + .../lj/lj_force_with_pme_direct_force_impl.cu | 132 + .../lj_force_with_pme_direct_force_impl.cuh | 28 + .../nb14/dihedral_14_cf_atom_energy_impl.cu | 80 + .../nb14/dihedral_14_cf_atom_energy_impl.cuh | 25 + .../sponge/nb14/dihedral_14_cf_energy_impl.cu | 80 + .../nb14/dihedral_14_cf_energy_impl.cuh | 25 + .../nb14/dihedral_14_lj_atom_energy_impl.cu | 102 + .../nb14/dihedral_14_lj_atom_energy_impl.cuh | 26 + ...al_14_lj_cf_force_with_atom_energy_impl.cu | 140 + ...l_14_lj_cf_force_with_atom_energy_impl.cuh | 27 + .../sponge/nb14/dihedral_14_lj_energy_impl.cu | 102 + .../nb14/dihedral_14_lj_energy_impl.cuh | 27 + .../sponge/nb14/dihedral_14_lj_force_impl.cu | 111 + .../sponge/nb14/dihedral_14_lj_force_impl.cuh | 26 + ...ihedral_14_lj_force_with_direct_cf_impl.cu | 124 + ...hedral_14_lj_force_with_direct_cf_impl.cuh | 26 + .../neighbor_list/neighbor_list_impl.cu | 419 +++ .../neighbor_list/neighbor_list_impl.cuh | 58 + .../nvtit/md_iteration_leap_frog_impl.cu | 139 + .../nvtit/md_iteration_leap_frog_impl.cuh | 27 + .../gpu/cuda_impl/sponge/pme/pme_common.cuh | 230 ++ .../cuda_impl/sponge/pme/pme_energy_impl.cu | 234 ++ .../cuda_impl/sponge/pme/pme_energy_impl.cuh | 30 + .../sponge/pme/pme_excluded_force_impl.cu | 102 + .../sponge/pme/pme_excluded_force_impl.cuh | 26 + .../sponge/pme/pme_reciprocal_force_impl.cu | 204 ++ .../sponge/pme/pme_reciprocal_force_impl.cuh | 28 + .../gpu/sponge/common/getcenter_kernel.cc | 27 + .../gpu/sponge/common/getcenter_kernel.h | 89 + .../gpu/sponge/common/mdtemperature_kernel.cc | 31 + .../gpu/sponge/common/mdtemperature_kernel.h | 96 + .../gpu/sponge/lj/lj_energy_kernel.cc | 34 + .../gpu/sponge/lj/lj_energy_kernel.h | 130 + .../gpu/sponge/lj/lj_force_kernel.cc | 34 + .../gpu/sponge/lj/lj_force_kernel.h | 129 + .../lj_force_with_pme_direct_force_kernel.cc | 34 + .../lj_force_with_pme_direct_force_kernel.h | 133 + .../nb14/dihedral_14_cf_atom_energy_kernel.cc | 34 + .../nb14/dihedral_14_cf_atom_energy_kernel.h | 114 + .../nb14/dihedral_14_cf_energy_kernel.cc | 34 + .../nb14/dihedral_14_cf_energy_kernel.h | 114 + .../nb14/dihedral_14_lj_atom_energy_kernel.cc | 36 + .../nb14/dihedral_14_lj_atom_energy_kernel.h | 123 + ..._14_lj_cf_force_with_atom_energy_kernel.cc | 38 + ...l_14_lj_cf_force_with_atom_energy_kernel.h | 132 + .../nb14/dihedral_14_lj_energy_kernel.cc | 36 + .../nb14/dihedral_14_lj_energy_kernel.h | 124 + .../nb14/dihedral_14_lj_force_gpu_kernel.cc | 36 + .../nb14/dihedral_14_lj_force_gpu_kernel.h | 122 + ...edral_14_lj_force_with_direct_cf_kernel.cc | 37 + ...hedral_14_lj_force_with_direct_cf_kernel.h | 130 + .../neighbor_list_update_kernel.cc | 45 + .../neighbor_list_update_kernel.h | 170 + .../nvtit/md_iteration_leap_frog_kernel.cc | 32 + .../nvtit/md_iteration_leap_frog_kernel.h | 115 + .../gpu/sponge/pme/pme_energy_kernel.cc | 38 + .../gpu/sponge/pme/pme_energy_kernel.h | 147 + .../sponge/pme/pme_excluded_force_kernel.cc | 32 + .../sponge/pme/pme_excluded_force_kernel.h | 95 + .../sponge/pme/pme_reciprocal_force_kernel.cc | 29 + .../sponge/pme/pme_reciprocal_force_kernel.h | 119 + mindspore/ccsrc/cxx_api/CMakeLists.txt | 3 +- mindspore/ops/operations/__init__.py | 27 +- mindspore/ops/operations/sponge_ops.py | 2890 ++++++++++++----- 77 files changed, 7843 insertions(+), 914 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 95ccf58cb8..9fd1c55c94 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -383,7 +383,8 @@ if(ENABLE_GPU) ${CUDNN_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcudart.so ${CUDA_PATH}/lib64/stubs/libcuda.so - ${CUDA_PATH}/lib64/libcusolver.so) + ${CUDA_PATH}/lib64/libcusolver.so + ${CUDA_PATH}/lib64/libcufft.so) if(ENABLE_MPI) set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) endif() diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu new file mode 100644 index 0000000000..2e283d297e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cu @@ -0,0 +1,45 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh" + +__global__ void GetCenterOfGeometryKernel(const int center_numbers, float center_numbers_inverse, + const int *center_atoms, const VECTOR *crd, VECTOR *center_of_geometry) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < center_numbers) { + int atom_i = center_atoms[i]; + VECTOR temp = center_numbers_inverse * crd[atom_i]; + atomicAdd(¢er_of_geometry[0].x, temp.x); + atomicAdd(¢er_of_geometry[0].y, temp.y); + atomicAdd(¢er_of_geometry[0].z, temp.z); + } +} + +void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, + const float *crd_f, float *center_of_geometry_f, cudaStream_t stream) { + VECTOR *crd = const_cast(reinterpret_cast(crd_f)); + VECTOR *center_of_geometry = const_cast(reinterpret_cast(center_of_geometry_f)); + GetCenterOfGeometryKernel<<(center_numbers) / 32), 32, 0, stream>>>( + center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry); + + cudaStreamSynchronize(stream); + + return; +} + +void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, float *crd_f, + float *center_of_geometry_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh new file mode 100644 index 0000000000..57a9dce898 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, + const float *crd_f, float *center_of_geometry_f, cudaStream_t stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu new file mode 100644 index 0000000000..e2f9b921e4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cu @@ -0,0 +1,51 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh" + +__global__ void MDTemperatureKernel(const int residue_numbers, const int *start, const int *end, const VECTOR *atom_vel, + const float *atom_mass, float *ek) { + int residue_i = blockDim.x * blockIdx.x + threadIdx.x; + if (residue_i < residue_numbers) { + VECTOR momentum = {0., 0., 0.}; + float res_mass = 0.; + int s = start[residue_i]; + int e = end[residue_i]; + float mass_lin; + for (int atom_i = s; atom_i < e; atom_i = atom_i + 1) { + mass_lin = atom_mass[atom_i]; + + momentum.x = momentum.x + mass_lin * atom_vel[atom_i].x; + momentum.y = momentum.y + mass_lin * atom_vel[atom_i].y; + momentum.z = momentum.z + mass_lin * atom_vel[atom_i].z; + res_mass = res_mass + mass_lin; + } + ek[residue_i] = 0.5 * (momentum.x * momentum.x + momentum.y * momentum.y + momentum.z * momentum.z) / res_mass * + 2. / 3. / CONSTANT_kB / residue_numbers; + } +} + +void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, + const float *atom_mass, float *ek, cudaStream_t stream) { + VECTOR *atom_vel = const_cast(reinterpret_cast(atom_vel_f)); + MDTemperatureKernel<<(residue_numbers) / 32), 32, 0, stream>>>(residue_numbers, start, end, + atom_vel, atom_mass, ek); + cudaStreamSynchronize(stream); + + return; +} +void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, + const float *atom_mass, float *ek, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh new file mode 100644 index 0000000000..93eb79d57f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, + const float *atom_mass, float *ek, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh index d63f930862..20fedff136 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh @@ -14,31 +14,59 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ #include #include #include #include + #include #include #include +#include #include "runtime/device/gpu/cuda_common.h" #define CONSTANT_Pi 3.1415926535897932 +#define TWO_DIVIDED_BY_SQRT_PI 1.1283791670218446 +#define CONSTANT_kB 0.00198716 +static dim3 thread_LJ(8, 32); struct VECTOR { float x; float y; float z; }; +struct INT_VECTOR { + int int_x; + int int_y; + int int_z; +}; struct UNSIGNED_INT_VECTOR { unsigned int uint_x; unsigned int uint_y; unsigned int uint_z; }; +struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; +}; +struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; +}; + +struct GRID_BUCKET { + int *atom_serial; +}; +struct GRID_POINTER { + int *grid_serial; +}; __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a, const UNSIGNED_INT_VECTOR uvec_b, const VECTOR scaler) { @@ -48,6 +76,15 @@ __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNE dr.z = (static_cast(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z; return dr; } +__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a, + const UINT_VECTOR_LJ_TYPE uvec_b, + const VECTOR scaler) { + VECTOR dr; + dr.x = (static_cast(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x; + dr.y = (static_cast(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y; + dr.z = (static_cast(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z; + return dr; +} __device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) { VECTOR vec; @@ -91,4 +128,124 @@ __device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VEC return vec; } -#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMON_H_ +__global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, + int *nl_atom_serial, NEIGHBOR_LIST *nl) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { + nl[i].atom_numbers = nl_atom_numbers[i]; + nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers; + } +} + +static inline bool Malloc_Safely(void **address, size_t size) { + address[0] = NULL; + address[0] = reinterpret_cast(malloc(size)); + if (address[0] != NULL) { + return true; + } else { + printf("malloc failed!\n"); + getchar(); + return false; + } +} +static inline bool Cuda_Malloc_Safely(void **address, size_t size) { + cudaError_t cuda_error = cudaMalloc(&address[0], size); + if (cuda_error == 0) { + return true; + } else { + printf("cudaMalloc failed! error %d\n", cuda_error); + getchar(); + return false; + } +} + +__global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR *crd, + UINT_VECTOR_LJ_TYPE *new_crd, const int *LJ_type, + const float *charge) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + new_crd[atom_i].uint_x = crd[atom_i].uint_x; + new_crd[atom_i].uint_y = crd[atom_i].uint_y; + new_crd[atom_i].uint_z = crd[atom_i].uint_z; + new_crd[atom_i].LJ_type = LJ_type[atom_i]; + new_crd[atom_i].charge = charge[atom_i]; + } +} + +__global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, + float4 *rand_float4) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < float4_numbers) { + rand_float4[i] = curand_normal4(&rand_state[i]); + } +} + +__global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, + const int seed) { + int id = threadIdx.x + blockIdx.x * blockDim.x; + /* Each thread gets same seed, a different sequence + number, no offset */ + if (id < float4_numbers) { + curand_init(seed, id, 0, &rand_state[id]); + } +} + +__global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = replace_element; + } +} + +__global__ static void Reset_List(const int element_numbers, float *list, const float replace_element) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = replace_element; + } +} + +__global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) { + if (threadIdx.x == 0) { + sum[0] = 0.; + } + __syncthreads(); + float lin = 0.; + for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) { + lin = lin + list[i]; + } + atomicAdd(sum, lin); +} + +__global__ static void Scale_List(const int element_numbers, float *list, float scaler) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = list[i] * scaler; + } +} + +__global__ static void Copy_List(const int element_numbers, const int *origin_list, int *list) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = origin_list[i]; + } +} +__global__ static void Copy_List(const int element_numbers, const float *origin_list, float *list) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = origin_list[i]; + } +} + +__global__ static void Print(const size_t size, const float *input_x) { + for (size_t i = 0; i < size; i++) { + printf("%f\n", input_x[i]); + } + return; +} +__global__ static void Print(const size_t size, const int *input_x) { + for (size_t i = 0; i < size; i++) { + printf("%d\n", input_x[i]); + } + return; +} + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu index c4e6051d5f..a3a5a2e4d2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cu @@ -29,8 +29,6 @@ __global__ void DihedralAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_IN int atom_k = atom_c[dihedral_i]; int atom_l = atom_d[dihedral_i]; - int temp_ipn = ipn[dihedral_i]; - float temp_pk = pk[dihedral_i]; float temp_pn = pn[dihedral_i]; float temp_gamc = gamc[dihedral_i]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu index 7e24136002..4bf4127248 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_energy_impl.cu @@ -29,8 +29,6 @@ __global__ void DihedralEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VE int atom_k = atom_c[dihedral_i]; int atom_l = atom_d[dihedral_i]; - int temp_ipn = ipn[dihedral_i]; - float temp_pk = pk[dihedral_i]; float temp_pn = pn[dihedral_i]; float temp_gamc = gamc[dihedral_i]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu index 959bca09d2..f7463c717a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/dihedral/dihedral_force_impl.cu @@ -31,7 +31,6 @@ __global__ void DihedralForceKernel(int dihedral_numbers, const UNSIGNED_INT_VEC int temp_ipn = ipn[dihedral_i]; - float temp_pk = pk[dihedral_i]; float temp_pn = pn[dihedral_i]; float temp_gamc = gamc[dihedral_i]; float temp_gams = gams[dihedral_i]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu new file mode 100644 index 0000000000..5538410eb5 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cu @@ -0,0 +1,102 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void LJ_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B, + const float cutoff_square, float *lj_ene) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + float dr_2; + float dr_4; + float dr_6; + float ene_lin = 0.; + + int x, y; + int atom_pair_LJ_type; + for (int j = threadIdx.y; j < N; j = j + blockDim.y) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + if (dr2 < cutoff_square) { + dr_2 = 1. / dr2; + dr_4 = dr_2 * dr_2; + dr_6 = dr_4 * dr_2; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + dr_2 = (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6; + ene_lin = ene_lin + dr_2; + } + } + atomicAdd(&lj_ene[atom_i], ene_lin); + } +} + +void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, + cudaStream_t stream) { + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + int max_neighbor_numbers = 800; + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast(uint_crd_with_LJ); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); + + Reset_List<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, d_LJ_energy_atom, 0.); + + LJ_Energy_CUDA<<(atom_numbers) / 8), thread_LJ, 0, stream>>>( + atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, d_LJ_energy_atom); + + return; +} +void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, + cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh new file mode 100644 index 0000000000..4c0b6f1805 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, + cudaStream_t stream); +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu new file mode 100644 index 0000000000..dd3011526c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cu @@ -0,0 +1,116 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void LJ_Force_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B, + const float cutoff_square, VECTOR *frc) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int B = ceilf(static_cast(N) / blockDim.y); + int atom_j; + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + float dr_2; + float dr_4; + float dr_8; + float dr_14; + float frc_abs = 0.; + VECTOR frc_lin; + VECTOR frc_record = {0., 0., 0.}; + + int x, y; + int atom_pair_LJ_type; + for (int j = threadIdx.y * B; j < (threadIdx.y + 1) * B; j = j + 1) { + if (j < N) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + if (dr2 < cutoff_square) { + dr_2 = 1. / dr2; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_14 = dr_8 * dr_4 * dr_2; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; + frc_lin.x = frc_abs * dr.x; + frc_lin.y = frc_abs * dr.y; + frc_lin.z = frc_abs * dr.z; + + frc_record.x = frc_record.x + frc_lin.x; + frc_record.y = frc_record.y + frc_lin.y; + frc_record.z = frc_record.z + frc_lin.z; + + atomicAdd(&frc[atom_j].x, -frc_lin.x); + atomicAdd(&frc[atom_j].y, -frc_lin.y); + atomicAdd(&frc[atom_j].z, -frc_lin.z); + } + } + } + atomicAdd(&frc[atom_i].x, frc_record.x); + atomicAdd(&frc[atom_i].y, frc_record.y); + atomicAdd(&frc[atom_i].z, frc_record.z); + } +} + +void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, + cudaStream_t stream) { + VECTOR *frc = reinterpret_cast(frc_f); + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + int max_neighbor_numbers = 800; + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast(uint_crd_with_LJ); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); + + LJ_Force_CUDA<<(atom_numbers) / 8), thread_LJ, 0, stream>>>( + atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, frc); + return; +} +void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh new file mode 100644 index 0000000000..401b5d3362 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu new file mode 100644 index 0000000000..22315b2dc7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cu @@ -0,0 +1,132 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void LJ_Force_With_Direct_CF_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, + const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, + const float *LJ_type_A, const float *LJ_type_B, const float cutoff, + VECTOR *frc, const float pme_beta, const float sqrt_pi) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr_2; + float dr_4; + float dr_8; + float dr_6; + float frc_abs = 0.; + VECTOR frc_lin; + VECTOR frc_record = {0., 0., 0.}; + + float charge_i = r1.charge; + float charge_j; + float dr_abs; + float dr_1; + float beta_dr; + float frc_cf_abs; + + int x, y; + int atom_pair_LJ_type; + for (int j = threadIdx.y; j < N; j = j + blockDim.y) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + charge_j = r2.charge; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr_abs = norm3df(dr.x, dr.y, dr.z); + if (dr_abs < cutoff) { + dr_1 = 1. / dr_abs; + dr_2 = dr_1 * dr_1; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_6 = dr_4 * dr_2; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8; + beta_dr = pme_beta * dr_abs; + frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); + frc_cf_abs = frc_cf_abs * dr_2 * dr_1; + frc_cf_abs = charge_i * charge_j * frc_cf_abs; + + frc_abs = frc_abs - frc_cf_abs; + + frc_lin.x = frc_abs * dr.x; + frc_lin.y = frc_abs * dr.y; + frc_lin.z = frc_abs * dr.z; + + frc_record.x = frc_record.x + frc_lin.x; + frc_record.y = frc_record.y + frc_lin.y; + frc_record.z = frc_record.z + frc_lin.z; + + atomicAdd(&frc[atom_j].x, -frc_lin.x); + atomicAdd(&frc[atom_j].y, -frc_lin.y); + atomicAdd(&frc[atom_j].z, -frc_lin.z); + } + } + atomicAdd(&frc[atom_i].x, frc_record.x); + atomicAdd(&frc[atom_i].y, frc_record.y); + atomicAdd(&frc[atom_i].z, frc_record.z); + } +} + +void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, + int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *frc_f, cudaStream_t stream) { + VECTOR *frc = reinterpret_cast(frc_f); + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + int max_neighbor_numbers = 800; + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast(uint_crd_with_LJ); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); + + LJ_Force_With_Direct_CF_CUDA<<(atom_numbers) / 8), thread_LJ, 0, stream>>>( + atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI); + return; +} + +void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, + int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *frc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh new file mode 100644 index 0000000000..826d4223bb --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, + int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *frc_f, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu new file mode 100644 index 0000000000..44219d215e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cu @@ -0,0 +1,80 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14CFAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; + UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; + + int int_x; + int int_y; + int int_z; + VECTOR dr; + float r_1; + float ene_lin = 0.; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + r_1 = rnorm3df(dr.x, dr.y, dr.z); + + ene_lin = r1.charge * r2.charge * r_1; + + ene_lin *= cf_scale_factor[dihedral_14_i]; + + atomicAdd(&ene[atom_i], ene_lin); + } +} + +void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); + Dihedral14CFAtomEnergyKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); + + cudaStreamSynchronize(stream); + + return; +} + +void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh new file mode 100644 index 0000000000..7d11c10793 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu new file mode 100644 index 0000000000..1bbde8bfa8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cu @@ -0,0 +1,80 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; + UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; + + int int_x; + int int_y; + int int_z; + VECTOR dr; + float r_1; + float ene_lin = 0.; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + r_1 = rnorm3df(dr.x, dr.y, dr.z); + + ene_lin = r1.charge * r2.charge * r_1; + + ene_lin *= cf_scale_factor[dihedral_14_i]; + + ene[dihedral_14_i] = ene_lin; + } +} + +void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); + Dihedral14CFEnergyKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); + + cudaStreamSynchronize(stream); + + return; +} + +void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh new file mode 100644 index 0000000000..0e14bd7534 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu new file mode 100644 index 0000000000..ccd4ba0595 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cu @@ -0,0 +1,102 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14LJAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *ene) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; + UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; + + int int_x; + int int_y; + int int_z; + VECTOR dr; + float dr2; + float dr_2; + float dr_4; + float dr_6; + float dr_12; + float ene_lin = 0.; + int x, y; + int atom_pair_LJ_type; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_2 = 1. / dr2; + dr_4 = dr_2 * dr_2; + dr_6 = dr_4 * dr_2; + dr_12 = dr_6 * dr_6; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 - + 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘 + ene_lin *= lj_scale_factor[dihedral_14_i]; + + atomicAdd(&ene[atom_i], ene_lin); + } +} + +void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *ene, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); + Dihedral14LJAtomEnergyKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); + + cudaStreamSynchronize(stream); + + return; +} + +void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *ene, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh new file mode 100644 index 0000000000..d27035a67a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *ene, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu new file mode 100644 index 0000000000..dd757caab7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cu @@ -0,0 +1,140 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_numbers, + const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, + const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *cf_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, VECTOR *frc, float *atom_energy) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1, r2; + VECTOR dr; + float dr_abs; + float dr2; + float dr_1; + float dr_2; + float dr_4; + float dr_8; + float dr_14; + float frc_abs = 0.; + VECTOR temp_frc; + + float ene_lin; + float ene_lin2; + + int x, y; + int atom_pair_LJ_type; + + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + r1 = uint_crd[atom_i]; + r2 = uint_crd[atom_j]; + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_2 = 1.0 / dr2; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_14 = dr_8 * dr_4 * dr_2; + dr_abs = norm3df(dr.x, dr.y, dr.z); + dr_1 = 1. / dr_abs; + + float charge_i = r1.charge; + float charge_j = r2.charge; + float frc_cf_abs; + frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1; + frc_cf_abs = -charge_i * charge_j * frc_cf_abs; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; + frc_abs *= lj_scale_factor[dihedral_14_i]; + + frc_abs += frc_cf_abs; + temp_frc.x = frc_abs * dr.x; + temp_frc.y = frc_abs * dr.y; + temp_frc.z = frc_abs * dr.z; + + atomicAdd(&frc[atom_j].x, -temp_frc.x); + atomicAdd(&frc[atom_j].y, -temp_frc.y); + atomicAdd(&frc[atom_j].z, -temp_frc.z); + atomicAdd(&frc[atom_i].x, temp_frc.x); + atomicAdd(&frc[atom_i].y, temp_frc.y); + atomicAdd(&frc[atom_i].z, temp_frc.z); + + ene_lin = r1.charge * r2.charge * dr_1; + ene_lin *= cf_scale_factor[dihedral_14_i]; + ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 - + 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘 + ene_lin2 *= lj_scale_factor[dihedral_14_i]; + + atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2); + } +} + +void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, + const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, + float *frc_f, float *atom_energy, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(atom_numbers, atom_energy, 0.); + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + VECTOR *frc = const_cast(reinterpret_cast(frc_f)); + + Dihedral14LJCFForceWithAtomEnergyKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, + LJ_type_B, frc, atom_energy); + + cudaStreamSynchronize(stream); + + return; +} + +void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc, float *atom_energy, + cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh new file mode 100644 index 0000000000..4ea8262b3c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, + const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, + float *frc, float *atom_energy, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu new file mode 100644 index 0000000000..3436c35613 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cu @@ -0,0 +1,102 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, + float *ene) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; + UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; + + int int_x; + int int_y; + int int_z; + VECTOR dr; + float dr2; + float dr_2; + float dr_4; + float dr_6; + float dr_12; + float ene_lin = 0.; + int x, y; + int atom_pair_LJ_type; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_2 = 1. / dr2; + dr_4 = dr_2 * dr_2; + dr_6 = dr_4 * dr_2; + dr_12 = dr_6 * dr_6; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 - + 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘 + ene_lin *= lj_scale_factor[dihedral_14_i]; + + ene[dihedral_14_i] = ene_lin; + } +} + +void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, + cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(dihedral_14_numbers, ene, 0.); + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + + Dihedral14LJEnergyKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); + + cudaStreamSynchronize(stream); + + return; +} + +void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, + cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh new file mode 100644 index 0000000000..4a132438b9 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, + cudaStream_t stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu new file mode 100644 index 0000000000..74f7a06758 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cu @@ -0,0 +1,111 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14LJForceKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, + VECTOR *frc) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1, r2; + VECTOR dr; + float dr2; + float dr_2; + float dr_4; + float dr_8; + float dr_14; + float frc_abs = 0.; + VECTOR temp_frc; + int x, y; + int atom_pair_LJ_type; + + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + r1 = uint_crd[atom_i]; + r2 = uint_crd[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_2 = 1.0 / dr2; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_14 = dr_8 * dr_4 * dr_2; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; + frc_abs *= lj_scale_factor[dihedral_14_i]; + temp_frc.x = frc_abs * dr.x; + temp_frc.y = frc_abs * dr.y; + temp_frc.z = frc_abs * dr.z; + + atomicAdd(&frc[atom_j].x, -temp_frc.x); + atomicAdd(&frc[atom_j].y, -temp_frc.y); + atomicAdd(&frc[atom_j].z, -temp_frc.z); + atomicAdd(&frc[atom_i].x, temp_frc.x); + atomicAdd(&frc[atom_i].y, temp_frc.y); + atomicAdd(&frc[atom_i].z, temp_frc.z); + } +} + +void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, + cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + cudaStreamSynchronize(stream); + Reset_List<<(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + VECTOR *frc = const_cast(reinterpret_cast(frc_f)); + + Dihedral14LJForceKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, frc); + cudaStreamSynchronize(stream); + return; +} + +void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, + cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh new file mode 100644 index 0000000000..7ea476ee2c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, + const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, + cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu new file mode 100644 index 0000000000..592d3a5206 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cu @@ -0,0 +1,124 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void Dihedral14LJForceWithDirectCFKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, + const VECTOR *boxlength, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, VECTOR *frc) { + int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; + if (dihedral_14_i < dihedral_14_numbers) { + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1, r2; + VECTOR dr; + float dr_abs; + float dr2; + float dr_1; + float dr_2; + float dr_4; + float dr_8; + float dr_14; + float frc_abs = 0.; + VECTOR temp_frc; + + int x, y; + int atom_pair_LJ_type; + + int atom_i = a_14[dihedral_14_i]; + int atom_j = b_14[dihedral_14_i]; + + r1 = uint_crd[atom_i]; + r2 = uint_crd[atom_j]; + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_2 = 1.0 / dr2; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_14 = dr_8 * dr_4 * dr_2; + dr_abs = norm3df(dr.x, dr.y, dr.z); + dr_1 = 1. / dr_abs; + + float charge_i = r1.charge; + float charge_j = r2.charge; + float frc_cf_abs; + frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1; + frc_cf_abs = -charge_i * charge_j * frc_cf_abs; + // LJ + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; + frc_abs *= lj_scale_factor[dihedral_14_i]; + + frc_abs += frc_cf_abs; + temp_frc.x = frc_abs * dr.x; + temp_frc.y = frc_abs * dr.y; + temp_frc.z = frc_abs * dr.z; + + atomicAdd(&frc[atom_j].x, -temp_frc.x); + atomicAdd(&frc[atom_j].y, -temp_frc.y); + atomicAdd(&frc[atom_j].z, -temp_frc.z); + atomicAdd(&frc[atom_i].x, temp_frc.x); + atomicAdd(&frc[atom_i].y, temp_frc.y); + atomicAdd(&frc[atom_i].z, temp_frc.z); + } +} + +void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; + Cuda_Malloc_Safely(reinterpret_cast(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); + cudaStreamSynchronize(stream); + VECTOR *boxlength = const_cast(reinterpret_cast(boxlength_f)); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); + VECTOR *frc = const_cast(reinterpret_cast(frc_f)); + + Dihedral14LJForceWithDirectCFKernel<<>>( + dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, + LJ_type_B, frc); + + return; +} + +void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh new file mode 100644 index 0000000000..8e4d95b856 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, + const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu new file mode 100644 index 0000000000..6c514879bd --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu @@ -0,0 +1,419 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" + +__global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = origin_list[i]; + } +} +__global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < element_numbers) { + list[i] = origin_list[i]; + } +} + +__global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd, + UNSIGNED_INT_VECTOR *uint_crd) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0]; + uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1]; + uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2]; + /*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x; + uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y; + uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/ + uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1; + uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1; + uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1; + } +} + +__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < vector_numbers) { + vec_list[i].x = vec_list[i].x + translation_vec.x; + vec_list[i].y = vec_list[i].y + translation_vec.y; + vec_list[i].z = vec_list[i].z + translation_vec.z; + } +} +__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < vector_numbers) { + vec_list[i].x = vec_list[i].x + translation_vec[0].x; + vec_list[i].y = vec_list[i].y + translation_vec[0].y; + vec_list[i].z = vec_list[i].z + translation_vec[0].z; + } +} +__global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + if (crd[atom_i].x >= 0) { + if (crd[atom_i].x < box_length[0]) { + } else { + crd[atom_i].x = crd[atom_i].x - box_length[0]; + } + } else { + crd[atom_i].x = crd[atom_i].x + box_length[0]; + } + + if (crd[atom_i].y >= 0) { + if (crd[atom_i].y < box_length[1]) { + } else { + crd[atom_i].y = crd[atom_i].y - box_length[1]; + } + } else { + crd[atom_i].y = crd[atom_i].y + box_length[1]; + } + + if (crd[atom_i].z >= 0) { + if (crd[atom_i].z < box_length[2]) { + } else { + crd[atom_i].z = crd[atom_i].z - box_length[2]; + } + } else { + crd[atom_i].z = crd[atom_i].z + box_length[2]; + } + } +} + +__global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) { + int grid_serial = blockDim.x * blockIdx.x + threadIdx.x; + if (grid_serial < grid_numbers) { + GRID_BUCKET bucket_i = bucket[grid_serial]; + for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) { + bucket_i.atom_serial[i] = -1; + } + atom_numbers_in_grid_bucket[grid_serial] = 0; + } +} + +__global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd, + const int *grid_N, const int gridxy, int *atom_in_grid_serial) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int Nx = static_cast(crd[atom_i].x) * grid_length_inverse[0]; // crd.x must < boxlength.x + int Ny = static_cast(crd[atom_i].y) * grid_length_inverse[1]; + int Nz = static_cast(crd[atom_i].z) * grid_length_inverse[2]; + Nx = Nx & ((Nx - grid_N[0]) >> 31); + Ny = Ny & ((Ny - grid_N[1]) >> 31); + Nz = Nz & ((Nz - grid_N[2]) >> 31); + atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx; + } +} + +__global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket, + int *atom_numbers_in_grid_bucket) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int grid_serial = atom_in_grid_serial[atom_i]; + GRID_BUCKET bucket_i = bucket[grid_serial]; + int a = atom_numbers_in_grid_bucket[grid_serial]; + atomicCAS(&bucket_i.atom_serial[a], -1, atom_i); + if (bucket_i.atom_serial[a] != atom_i) { + while (true) { + a = a + 1; + atomicCAS(&bucket_i.atom_serial[a], -1, atom_i); + if (bucket_i.atom_serial[a] == atom_i) { + atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1); + break; + } + } + } else { + atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1); + } + } +} +__global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, + const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial, + const GRID_POINTER *gpointer, const GRID_BUCKET *bucket, + const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl, + const float cutoff_skin_square) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int grid_serial = atom_in_grid_serial[atom_i]; + int grid_serial2; + int atom_numbers_in_nl_lin = 0; + int atom_j; + int int_x; + int int_y; + int int_z; + UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i]; + NEIGHBOR_LIST nl_i = nl[atom_i]; + GRID_POINTER gpointer_i = gpointer[grid_serial]; + VECTOR dr; + float dr2; + for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) { + grid_serial2 = gpointer_i.grid_serial[grid_cycle]; + GRID_BUCKET bucket_i = bucket[grid_serial2]; + for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) { + atom_j = bucket_i.atom_serial[i]; + if (atom_j > atom_i) { + int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x; + int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y; + int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z; + dr.x = uint_dr_to_dr_cof[0] * int_x; + dr.y = uint_dr_to_dr_cof[1] * int_y; + dr.z = uint_dr_to_dr_cof[2] * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + if (dr2 < cutoff_skin_square) { + nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j; + atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1; + } + } + } + } // 124 grid cycle + nl[atom_i].atom_numbers = atom_numbers_in_nl_lin; + } +} + +__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd, + const float half_skin_square, int *need_refresh_flag) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < atom_numbers) { + VECTOR r1 = crd[i]; + VECTOR r2 = old_crd[i]; + r1.x = r1.x - r2.x; + r1.y = r1.y - r2.y; + r1.z = r1.z - r2.z; + float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z; + if (r1_2 > half_skin_square) { + atomicExch(&need_refresh_flag[0], 1); + } + } +} + +__global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl, + const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int excluded_number = excluded_atom_numbers[atom_i]; + if (excluded_number > 0) { + int list_start = excluded_list_start[atom_i]; + int atom_min = excluded_list[list_start]; + int list_end = list_start + excluded_number; + int atom_max = excluded_list[list_end - 1]; + NEIGHBOR_LIST nl_i = nl[atom_i]; + int atomnumbers_in_nl_lin = nl_i.atom_numbers; + int atom_j; + int excluded_atom_numbers_lin = list_end - list_start; + int excluded_atom_numbers_count = 0; + for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) { + atom_j = nl_i.atom_serial[i]; + if (atom_j < atom_min || atom_j > atom_max) { + continue; + } else { + for (int j = list_start; j < list_end; j = j + 1) { + if (atom_j == excluded_list[j]) { + atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1; + nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin]; + excluded_atom_numbers_count = excluded_atom_numbers_count + 1; + i = i - 1; + } + } + if (excluded_atom_numbers_count < excluded_atom_numbers_lin) { + } else { + break; + } // break + } // in the range of excluded min to max + } // cycle for neighbors + nl[atom_i].atom_numbers = atomnumbers_in_nl_lin; + } // if need excluded + } +} + +void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd, + UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof, + int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer, + GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, + int *excluded_list_start, int *excluded_list, int *excluded_numbers, + float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, + cudaStream_t stream) { + if (refresh_sign[0] == 1) { + VECTOR trans_vec = {-skin, -skin, -skin}; + Clear_Grid_Bucket<<(grid_numbers) / thread), thread, 0, stream>>>( + grid_numbers, atom_numbers_in_grid_bucket, bucket); + + Vector_Translation<<(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, + trans_vec); + + Crd_Periodic_Map<<(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, + box_length); + + Find_Atom_In_Grid_Serial<<(atom_numbers) / thread), thread, 0, stream>>>( + atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); + + trans_vec.x = -trans_vec.x; + trans_vec.y = -trans_vec.y; + trans_vec.z = -trans_vec.z; + + Vector_Translation<<(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, + trans_vec); + + Copy_List<<(3. * atom_numbers) / thread), thread, 0, stream>>>( + 3 * atom_numbers, reinterpret_cast(crd), reinterpret_cast(old_crd)); + + Put_Atom_In_Grid_Bucket<<(atom_numbers) / thread), thread, 0, stream>>>( + atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); + + Crd_To_Uint_Crd<<(atom_numbers) / thread), thread, 0, stream>>>( + atom_numbers, crd_to_uint_crd_cof, crd, uint_crd); + + Find_atom_neighbors<<(atom_numbers) / thread), thread, 0, stream>>>( + atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, + d_nl, cutoff_skin_square); + + Delete_Excluded_Atoms_Serial_In_Neighbor_List<<(atom_numbers) / thread), thread, 0, + stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, + excluded_numbers); + refresh_sign[0] = 0; + } +} + +void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, + VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, + float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, + float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket, + int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start, + int *excluded_list, int *excluded_numbers, float cutoff_skin_square, + int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, + cudaStream_t stream) { + VECTOR trans_vec = {skin, skin, skin}; + Clear_Grid_Bucket<<(grid_numbers) / 32), 32, 0, stream>>>( + grid_numbers, atom_numbers_in_grid_bucket, bucket); + Crd_Periodic_Map<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); + Find_Atom_In_Grid_Serial<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); + Vector_Translation<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); + Copy_List<<(3. * atom_numbers) / 32), 32, 0, stream>>>( + 3 * atom_numbers, reinterpret_cast(crd), reinterpret_cast(old_crd)); + Put_Atom_In_Grid_Bucket<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); + Crd_To_Uint_Crd<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof, + crd, uint_crd); + + Find_atom_neighbors<<(atom_numbers) / thread), thread, 0, stream>>>( + atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, + cutoff_skin_square); + Delete_Excluded_Atoms_Serial_In_Neighbor_List<<(atom_numbers) / thread), thread, 0, + stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, + excluded_numbers); +} + +__global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, + int *nl_atom_serial, NEIGHBOR_LIST *nl) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { + nl[i].atom_numbers = nl_atom_numbers[i]; + nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers; + } +} + +void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, + NEIGHBOR_LIST *nl, cudaStream_t stream) { + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl); +} + +void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square, + int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, + float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket, + VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof, + UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, + NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, + int *excluded_numbers, cudaStream_t stream) { + VECTOR trans_vec = {-skin, -skin, -skin}; + + Clear_Grid_Bucket<<(grid_numbers) / 32), 32, 0, stream>>>( + grid_numbers, atom_numbers_in_grid_bucket, bucket); + + Vector_Translation<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); + + Crd_Periodic_Map<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); + + Find_Atom_In_Grid_Serial<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); + trans_vec.x = -trans_vec.x; + trans_vec.y = -trans_vec.y; + trans_vec.z = -trans_vec.z; + Vector_Translation<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); + + cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream); + + Put_Atom_In_Grid_Bucket<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); + + Crd_To_Uint_Crd<<(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof, + crd, uint_crd); + + Find_atom_neighbors<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, + cutoff_skin_square); + + Delete_Excluded_Atoms_Serial_In_Neighbor_List<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers); +} + +__global__ void Mul_half(float *src, float *dst) { + int index = threadIdx.x; + if (index < 3) { + dst[index] = src[index] * 0.5; + } +} + +void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval, + int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, + int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, + int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, + float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, + float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, + int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, + int *is_need_refresh_neighbor_list, cudaStream_t stream) { + if (not_first_time) { + if (refresh_interval > 0) { + if (refresh_count % refresh_interval == 0) { + Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); + Refresh_Neighbor_List_No_Check( + grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket, + grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast(crd), + reinterpret_cast(old_crd), crd_to_uint_crd_cof, reinterpret_cast(uint_crd), + uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream); + } + refresh_count += 1; + } else { + Is_need_refresh_neighbor_list_cuda<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, reinterpret_cast(crd), reinterpret_cast(old_crd), half_skin_square, + is_need_refresh_neighbor_list); + Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); + Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast(crd), + reinterpret_cast(old_crd), reinterpret_cast(uint_crd), + half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, + gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list, + excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy, + stream); + } + } else { + Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); + Refresh_Neighbor_List_First_Time( + is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast(crd), + reinterpret_cast(old_crd), reinterpret_cast(uint_crd), half_crd_to_uint_crd_cof, + uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, + excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, + grid_N, Nxy, stream); + } +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh new file mode 100644 index 0000000000..c6c6db415d --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh @@ -0,0 +1,58 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ + +struct VECTOR { + float x; + float y; + float z; +}; +struct INT_VECTOR { + int int_x; + int int_y; + int int_z; +}; +struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; +}; +struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; +}; +struct GRID_BUCKET { + int *atom_serial; +}; +struct GRID_POINTER { + int *grid_serial; +}; + +void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, + NEIGHBOR_LIST *nl, cudaStream_t stream); + +void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval, + int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, + int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, + int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, + float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, + float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, + int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, + int *is_need_refresh_neighbor_list, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu new file mode 100644 index 0000000000..47e3e454f2 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu @@ -0,0 +1,139 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt, + const float exp_gamma, const float *inverse_mass, + const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd, + VECTOR *frc, VECTOR *acc, VECTOR *random_frc) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < atom_numbers) { + acc[i].x = inverse_mass[i] * frc[i].x; + acc[i].y = inverse_mass[i] * frc[i].y; + acc[i].z = inverse_mass[i] * frc[i].z; + + vel[i].x = vel[i].x + dt * acc[i].x; + vel[i].y = vel[i].y + dt * acc[i].y; + vel[i].z = vel[i].z + dt * acc[i].z; + + crd[i].x = crd[i].x + half_dt * vel[i].x; + crd[i].y = crd[i].y + half_dt * vel[i].y; + crd[i].z = crd[i].z + half_dt * vel[i].z; + + vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; + vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; + vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; + + crd[i].x = crd[i].x + half_dt * vel[i].x; + crd[i].y = crd[i].y + half_dt * vel[i].y; + crd[i].z = crd[i].z + half_dt * vel[i].z; + + frc[i].x = 0.; + frc[i].y = 0.; + frc[i].z = 0.; + } +} + +__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt, + const float dt, const float exp_gamma, + const float *inverse_mass, + const float *sqrt_mass_inverse, VECTOR *vel, + VECTOR *crd, VECTOR *frc, VECTOR *acc, + VECTOR *random_frc, const float max_vel) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + float abs_vel; + if (i < atom_numbers) { + acc[i].x = inverse_mass[i] * frc[i].x; + acc[i].y = inverse_mass[i] * frc[i].y; + acc[i].z = inverse_mass[i] * frc[i].z; + + vel[i].x = vel[i].x + dt * acc[i].x; + vel[i].y = vel[i].y + dt * acc[i].y; + vel[i].z = vel[i].z + dt * acc[i].z; + + abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z); + if (abs_vel < max_vel) { + } else { + abs_vel = max_vel / abs_vel; + vel[i].x = abs_vel * vel[i].x; + vel[i].y = abs_vel * vel[i].y; + vel[i].z = abs_vel * vel[i].z; + } + + crd[i].x = crd[i].x + half_dt * vel[i].x; + crd[i].y = crd[i].y + half_dt * vel[i].y; + crd[i].z = crd[i].z + half_dt * vel[i].z; + + vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; + vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; + vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; + + crd[i].x = crd[i].x + half_dt * vel[i].x; + crd[i].y = crd[i].y + half_dt * vel[i].y; + crd[i].z = crd[i].z + half_dt * vel[i].z; + + frc[i].x = 0.; + frc[i].y = 0.; + frc[i].z = 0.; + } +} + +void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, + const float exp_gamma, const int is_max_velocity, const float max_velocity, + const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, + float *frc_f, float *acc_f, cudaStream_t stream) { + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, vel_f, 0.); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, crd_f, 0.); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); + Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.); + + VECTOR *frc = const_cast(reinterpret_cast(frc_f)); + VECTOR *vel = const_cast(reinterpret_cast(vel_f)); + VECTOR *acc = const_cast(reinterpret_cast(acc_f)); + VECTOR *crd = const_cast(reinterpret_cast(crd_f)); + + curandStatePhilox4_32_10_t *rand_state; + VECTOR *random_force; + + Cuda_Malloc_Safely(reinterpret_cast(&random_force), sizeof(float4) * float4_numbers); + Cuda_Malloc_Safely(reinterpret_cast(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers); + Setup_Rand_Normal_Kernel<<(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1); + Rand_Normal<<(float4_numbers) / 32.), 32, 0, stream>>>( + float4_numbers, rand_state, reinterpret_cast(random_force)); + + if (!is_max_velocity) { + MD_Iteration_Leap_Frog_With_LiuJian<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force); + } else { + MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<(atom_numbers) / 32), 32, 0, + stream>>>(atom_numbers, half_dt, dt, exp_gamma, + d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, + random_force, max_velocity); + + cudaStreamSynchronize(stream); + cudaFree(random_force); + cudaFree(rand_state); + + return; + } +} + +void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, + const float exp_gamma, const int is_max_velocity, const float max_velocity, + const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, + float *frc_f, float *acc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh new file mode 100644 index 0000000000..1db936bda9 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H + +#include +#include "runtime/device/gpu/cuda_common.h" + +void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, + const float exp_gamma, const int is_max_velocity, const float max_velocity, + const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, + float *frc_f, float *acc_f, cudaStream_t stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh new file mode 100644 index 0000000000..427e63e73e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh @@ -0,0 +1,230 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +__constant__ float PME_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0}; +__constant__ float PME_Mb[4] = {0, 0.5, -1, 0.5}; +__constant__ float PME_Mc[4] = {0, 0.5, 0, -0.5}; +__constant__ float PME_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0}; +__constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5}; +__constant__ float PME_dMb[4] = {0, 1, -2, 1}; +__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5}; +#define PI 3.1415926 +const float periodic_factor_inverse = 2.32830643e-10; +static dim3 thread_PME; + +const float cutoff = 10.0; +const float tolerance = 0.00001; + +static float M_(float u, int n) { + if (n == 2) { + if (u > 2 || u < 0) return 0; + return 1 - abs(u - 1); + } else { + return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1); + } +} + +static float Get_Beta(float cutoff, float tolerance) { + float beta, low, high, tempf; + int ilow, ihigh; + + high = 1.0; + ihigh = 1; + + while (1) { + tempf = erfc(high * cutoff) / cutoff; + if (tempf <= tolerance) break; + high *= 2; + ihigh++; + } + + ihigh += 50; + low = 0.0; + for (ilow = 1; ilow < ihigh; ilow++) { + beta = (low + high) / 2; + tempf = erfc(beta * cutoff) / cutoff; + if (tempf >= tolerance) + low = beta; + else + high = beta; + } + return beta; +} + +static cufftComplex expc(cufftComplex z) { + cufftComplex res; + float t = expf(z.x); + sincosf(z.y, &res.y, &res.x); + res.x *= t; + res.y *= t; + return res; +} + +static float getb(int k, int NFFT, int B_order) { + cufftComplex tempc, tempc2, res; + float tempf; + tempc2.x = 0; + tempc2.y = 0; + + tempc.x = 0; + tempc.y = 2 * (B_order - 1) * PI * k / NFFT; + res = expc(tempc); + + for (int kk = 0; kk < (B_order - 1); kk++) { + tempc.x = 0; + tempc.y = 2 * PI * k / NFFT * kk; + tempc = expc(tempc); + tempf = M_(kk + 1, B_order); + tempc2.x += tempf * tempc.x; + tempc2.y += tempf * tempc.y; + } + res = cuCdivf(res, tempc2); + return res.x * res.x + res.y * res.y; +} + +__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin, + const float periodic_factor_inverse_x, const float periodic_factor_inverse_y, + const float periodic_factor_inverse_z, const int atom_numbers, const int fftx, + const int ffty, const int fftz, const UNSIGNED_INT_VECTOR *PME_kxyz, + UNSIGNED_INT_VECTOR *PME_uxyz, VECTOR *PME_frxyz) { + int atom = blockDim.x * blockIdx.x + threadIdx.x; + if (atom < atom_numbers) { + UNSIGNED_INT_VECTOR *temp_uxyz = &PME_uxyz[atom]; + int k, tempux, tempuy, tempuz; + float tempf; + tempf = static_cast (uint_crd[atom].uint_x) * periodic_factor_inverse_x; + tempux = static_cast (tempf); + PME_frxyz[atom].x = tempf - tempux; + + tempf = static_cast (uint_crd[atom].uint_y) * periodic_factor_inverse_y; + tempuy = static_cast (tempf); + PME_frxyz[atom].y = tempf - tempuy; + + tempf = static_cast (uint_crd[atom].uint_z) * periodic_factor_inverse_z; + tempuz = static_cast (tempf); + PME_frxyz[atom].z = tempf - tempuz; + + if (tempux != (*temp_uxyz).uint_x || tempuy != (*temp_uxyz).uint_y || tempuz != (*temp_uxyz).uint_z) { + (*temp_uxyz).uint_x = tempux; + (*temp_uxyz).uint_y = tempuy; + (*temp_uxyz).uint_z = tempuz; + int *temp_near = PME_atom_near + atom * 64; + int kx, ky, kz; + for (k = 0; k < 64; k++) { + UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k]; + kx = tempux - temp_kxyz.uint_x; + if (kx < 0) kx += fftx; + ky = tempuy - temp_kxyz.uint_y; + if (ky < 0) ky += ffty; + kz = tempuz - temp_kxyz.uint_z; + if (kz < 0) kz += fftz; + temp_near[k] = kx * PME_Nin + ky * fftz + kz; + } + } + } +} + +__global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxyz, float *PME_Q, + const UNSIGNED_INT_VECTOR *PME_kxyz, const int atom_numbers) { + int atom = blockDim.x * blockIdx.x + threadIdx.x; + + if (atom < atom_numbers) { + int k; + float tempf, tempQ, tempf2; + + int *temp_near = PME_atom_near + atom * 64; + VECTOR temp_frxyz = PME_frxyz[atom]; + float tempcharge = charge[atom]; + + UNSIGNED_INT_VECTOR temp_kxyz; + unsigned int kx; + + for (k = threadIdx.y; k < 64; k = k + blockDim.y) { + temp_kxyz = PME_kxyz[k]; + kx = temp_kxyz.uint_x; + tempf = (temp_frxyz.x); + tempf2 = tempf * tempf; + tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + + tempQ = tempcharge * tempf; + + kx = temp_kxyz.uint_y; + tempf = (temp_frxyz.y); + tempf2 = tempf * tempf; + tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + + tempQ = tempQ * tempf; + + kx = temp_kxyz.uint_z; + tempf = (temp_frxyz.z); + tempf2 = tempf * tempf; + tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + tempQ = tempQ * tempf; + + atomicAdd(&PME_Q[temp_near[k]], tempQ); + } + } +} + +__global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, + const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength, + const float *charge, const float beta, const float cutoff_square, + float *direct_ene) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + float dr_abs; + // float dr_inverse; + float ene_temp; + float charge_i = charge[atom_i]; + float ene_lin = 0.; + + // int x, y; + // int atom_pair_LJ_type; + for (int j = threadIdx.y; j < N; j = j + blockDim.y) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + if (dr2 < cutoff_square) { + dr_abs = norm3df(dr.x, dr.y, dr.z); + ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs; + ene_lin = ene_lin + ene_temp; + } + } + atomicAdd(direct_ene, ene_lin); + } +} + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu new file mode 100644 index 0000000000..7ef1132eff --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu @@ -0,0 +1,234 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) { + if (threadIdx.x == 0) { + sum[0] = 0.; + } + __syncthreads(); + float lin = 0.0; + for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { + lin = lin + list1[i] * list2[i]; + } + atomicAdd(sum, lin); +} + +__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) { + if (threadIdx.x == 0) { + sum[0] = 0.; + } + __syncthreads(); + float lin = 0.0; + cufftComplex FQ_i; + for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { + FQ_i = FQ[i]; + lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i]; + } + atomicAdd(sum, lin); +} + +__global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, + const VECTOR *sacler, const float *charge, const float pme_beta, + const float sqrt_pi, const int *excluded_list_start, + const int *excluded_list, const int *excluded_atom_numbers, float *ene) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int excluded_number = excluded_atom_numbers[atom_i]; + if (excluded_number > 0) { + int list_start = excluded_list_start[atom_i]; + // int atom_min = excluded_list[list_start]; + int list_end = list_start + excluded_number; + int atom_j; + int int_x; + int int_y; + int int_z; + + float charge_i = charge[atom_i]; + float charge_j; + float dr_abs; + float beta_dr; + + UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + + float ene_lin = 0.; + + for (int i = list_start; i < list_end; i = i + 1) { + atom_j = excluded_list[i]; + r2 = uint_crd[atom_j]; + charge_j = charge[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = sacler[0].x * int_x; + dr.y = sacler[0].y * int_y; + dr.z = sacler[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_abs = sqrtf(dr2); + beta_dr = pme_beta * dr_abs; + + ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs; + } + atomicAdd(ene, ene_lin); + } + } +} + +void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, + int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, + const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, + float *d_correction_ene, cudaStream_t stream) { + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + int max_neighbor_numbers = 800; + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + std::vector h_box_length(3); + cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost, + stream); + cudaStreamSynchronize(stream); + VECTOR *box_length = reinterpret_cast(h_box_length.data()); + + UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast(pme_uxyz); + UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast(pme_kxyz); + VECTOR *PME_frxyz = reinterpret_cast(pme_frxyz); + cufftComplex *PME_FQ = reinterpret_cast(pme_fq); + cufftHandle PME_plan_r2c; + cufftHandle PME_plan_c2r; + cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); + cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); + cufftSetStream(PME_plan_r2c, stream); + cufftSetStream(PME_plan_c2r, stream); + thread_PME.x = 8; + thread_PME.y = 8; + int PME_Nin = ffty * fftz; + int PME_Nfft = fftx * ffty * (fftz / 2 + 1); + int PME_Nall = fftx * ffty * fftz; + float volume = box_length[0].x * box_length[0].y * box_length[0].z; + + UNSIGNED_INT_VECTOR *PME_kxyz_cpu; + Malloc_Safely(reinterpret_cast(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64); + + int kx, ky, kz, kxrp, kyrp, kzrp, index; + for (kx = 0; kx < 4; kx++) { + for (ky = 0; ky < 4; ky++) { + for (kz = 0; kz < 4; kz++) { + index = kx * 16 + ky * 4 + kz; + PME_kxyz_cpu[index].uint_x = kx; + PME_kxyz_cpu[index].uint_y = ky; + PME_kxyz_cpu[index].uint_z = kz; + } + } + } + cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + free(PME_kxyz_cpu); + + // initial start + float *B1, *B2, *B3, *PME_BC0; + B1 = reinterpret_cast(malloc(sizeof(float) * fftx)); + B2 = reinterpret_cast(malloc(sizeof(float) * ffty)); + B3 = reinterpret_cast(malloc(sizeof(float) * fftz)); + PME_BC0 = reinterpret_cast(malloc(sizeof(float) * PME_Nfft)); + + for (kx = 0; kx < fftx; kx++) { + B1[kx] = getb(kx, fftx, 4); + } + + for (ky = 0; ky < ffty; ky++) { + B2[ky] = getb(ky, ffty, 4); + } + + for (kz = 0; kz < fftz; kz++) { + B3[kz] = getb(kz, fftz, 4); + } + float mprefactor = PI * PI / -beta / beta; + + float msq; + for (kx = 0; kx < fftx; kx++) { + kxrp = kx; + if (kx > fftx / 2) kxrp = fftx - kx; + for (ky = 0; ky < ffty; ky++) { + kyrp = ky; + if (ky > ffty / 2) kyrp = ffty - ky; + for (kz = 0; kz <= fftz / 2; kz++) { + kzrp = kz; + + msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + + kzrp * kzrp / box_length[0].z / box_length[0].z; + index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; + if ((kx + ky + kz) == 0) { + PME_BC0[index] = 0; + } else { + PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; + } + + PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; + } + } + } + + cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + free(B1); + free(B2); + free(B3); + free(PME_BC0); + + Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast(PME_uxyz), + 1 << 30); + PME_Atom_Near<<>>( + uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, + periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); + + Reset_List<<>>(PME_Nall, PME_Q, 0); + + PME_Q_Spread<<>>(PME_atom_near, charge, PME_frxyz, PME_Q, + PME_kxyz, atom_numbers); + + cufftExecR2C(PME_plan_r2c, reinterpret_cast(PME_Q), reinterpret_cast(PME_FQ)); + + PME_Energy_Reciprocal<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene); + + PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene); + Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI)); + + Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0); + PME_Direct_Energy<<>>( + atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene); + + Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0); + PME_Excluded_Energy_Correction<<>>( + atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers, + d_correction_ene); + return; +} +void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, + int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, + const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, + float *d_correction_ene, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh new file mode 100644 index 0000000000..90457f1c90 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh @@ -0,0 +1,30 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_ + +#include +#include +#include "runtime/device/gpu/cuda_common.h" + +void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, + int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, + const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, + float *d_correction_ene, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu new file mode 100644 index 0000000000..30657d5b53 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cu @@ -0,0 +1,102 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" + +__global__ void PME_Excluded_Force_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, + const VECTOR *sacler, const float *charge, const float pme_beta, + const float sqrt_pi, const int *excluded_list_start, + const int *excluded_list, const int *excluded_atom_numbers, VECTOR *frc) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int excluded_numbers = excluded_atom_numbers[atom_i]; + if (excluded_numbers > 0) { + int list_start = excluded_list_start[atom_i]; + // int atom_min = excluded_list[list_start]; + int list_end = list_start + excluded_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + + float charge_i = charge[atom_i]; + float charge_j; + float dr_abs; + float beta_dr; + + UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + + float frc_abs = 0.; + VECTOR frc_lin; + VECTOR frc_record = {0., 0., 0.}; + + for (int i = list_start; i < list_end; i = i + 1) { + atom_j = excluded_list[i]; + r2 = uint_crd[atom_j]; + charge_j = charge[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = sacler[0].x * int_x; + dr.y = sacler[0].y * int_y; + dr.z = sacler[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_abs = sqrtf(dr2); + beta_dr = pme_beta * dr_abs; + // sqrt_pi= 2/sqrt(3.141592654); + frc_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); + frc_abs = (frc_abs - 1.) / dr2 / dr_abs; + frc_abs = -charge_i * charge_j * frc_abs; + frc_lin.x = frc_abs * dr.x; + frc_lin.y = frc_abs * dr.y; + frc_lin.z = frc_abs * dr.z; + + frc_record.x = frc_record.x + frc_lin.x; + frc_record.y = frc_record.y + frc_lin.y; + frc_record.z = frc_record.z + frc_lin.z; + + atomicAdd(&frc[atom_j].x, -frc_lin.x); + atomicAdd(&frc[atom_j].y, -frc_lin.y); + atomicAdd(&frc[atom_j].z, -frc_lin.z); + } // atom_j cycle + atomicAdd(&frc[atom_i].x, frc_record.x); + atomicAdd(&frc[atom_i].y, frc_record.y); + atomicAdd(&frc[atom_i].z, frc_record.z); + } // if need excluded + } +} + +void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, + const float *charge, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream) { + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + VECTOR *frc = reinterpret_cast(frc_f); + VECTOR *sacler = const_cast(reinterpret_cast(sacler_f)); + + PME_Excluded_Force_Correction<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, uint_crd, sacler, charge, pme_beta, TWO_DIVIDED_BY_SQRT_PI, excluded_list_start, excluded_list, + excluded_atom_numbers, frc); + return; +} + +void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, + const float *charge, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh new file mode 100644 index 0000000000..b14888962a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, + const float *charge, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu new file mode 100644 index 0000000000..b064a7df1f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu @@ -0,0 +1,204 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" + +__global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { + int index = blockDim.x * blockIdx.x + threadIdx.x; + if (index < PME_Nfft) { + float tempf = PME_BC[index]; + cufftComplex tempc = PME_FQ[index]; + PME_FQ[index].x = tempc.x * tempf; + PME_FQ[index].y = tempc.y * tempf; + } +} + +__global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force, + const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz, + const VECTOR PME_inverse_box_vector, const int atom_numbers) { + int atom = blockDim.x * blockIdx.x + threadIdx.x; + if (atom < atom_numbers) { + int k, kx; + float tempdQx, tempdQy, tempdQz, tempdx, tempdy, tempdz, tempx, tempy, tempz, tempdQf; + float tempf, tempf2; + float temp_charge = charge[atom]; + int *temp_near = PME_atom_near + atom * 64; + UNSIGNED_INT_VECTOR temp_kxyz; + VECTOR temp_frxyz = PME_frxyz[atom]; + for (k = threadIdx.y; k < 64; k = k + blockDim.y) { + temp_kxyz = PME_kxyz[k]; + tempdQf = -PME_Q[temp_near[k]] * temp_charge; + + kx = temp_kxyz.uint_x; + tempf = (temp_frxyz.x); + tempf2 = tempf * tempf; + tempx = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + tempdx = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; + + kx = temp_kxyz.uint_y; + tempf = (temp_frxyz.y); + tempf2 = tempf * tempf; + tempy = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + tempdy = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; + + kx = temp_kxyz.uint_z; + tempf = (temp_frxyz.z); + tempf2 = tempf * tempf; + tempz = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; + tempdz = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; + + tempdQx = tempdx * tempy * tempz * PME_inverse_box_vector.x; + tempdQy = tempdy * tempx * tempz * PME_inverse_box_vector.y; + tempdQz = tempdz * tempx * tempy * PME_inverse_box_vector.z; + + atomicAdd(&force[atom].x, tempdQf * tempdQx); + atomicAdd(&force[atom].y, tempdQf * tempdQy); + atomicAdd(&force[atom].z, tempdQf * tempdQz); + } + } +} + +void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, + cudaStream_t stream) { + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast(pme_uxyz); + UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast(pme_kxyz); + Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast(PME_uxyz), + 1 << 30); + + VECTOR *PME_frxyz = reinterpret_cast(pme_frxyz); + VECTOR *frc = reinterpret_cast(force); + + std::vector h_box_length(3); + cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost, + stream); + cudaStreamSynchronize(stream); + VECTOR *box_length = const_cast(reinterpret_cast(h_box_length.data())); + cufftComplex *PME_FQ = reinterpret_cast(pme_fq); + + VECTOR PME_inverse_box_vector; + PME_inverse_box_vector.x = static_cast(fftx) / box_length[0].x; + PME_inverse_box_vector.y = static_cast(ffty) / box_length[0].y; + PME_inverse_box_vector.z = static_cast(fftz) / box_length[0].z; + cufftHandle PME_plan_r2c; + cufftHandle PME_plan_c2r; + cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); + cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); + cufftSetStream(PME_plan_r2c, stream); + cufftSetStream(PME_plan_c2r, stream); + thread_PME.x = 8; + thread_PME.y = 8; + int PME_Nin = ffty * fftz; + int PME_Nfft = fftx * ffty * (fftz / 2 + 1); + int PME_Nall = fftx * ffty * fftz; + float volume = box_length[0].x * box_length[0].y * box_length[0].z; + + UNSIGNED_INT_VECTOR *PME_kxyz_cpu; + Malloc_Safely(reinterpret_cast(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64); + + int kx, ky, kz, kxrp, kyrp, kzrp, index; + for (kx = 0; kx < 4; kx++) { + for (ky = 0; ky < 4; ky++) { + for (kz = 0; kz < 4; kz++) { + index = kx * 16 + ky * 4 + kz; + PME_kxyz_cpu[index].uint_x = kx; + PME_kxyz_cpu[index].uint_y = ky; + PME_kxyz_cpu[index].uint_z = kz; + } + } + } + cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + free(PME_kxyz_cpu); + + // initial start + float *B1, *B2, *B3, *PME_BC0; + B1 = reinterpret_cast(malloc(sizeof(float) * fftx)); + B2 = reinterpret_cast(malloc(sizeof(float) * ffty)); + B3 = reinterpret_cast(malloc(sizeof(float) * fftz)); + PME_BC0 = reinterpret_cast(malloc(sizeof(float) * PME_Nfft)); + + for (kx = 0; kx < fftx; kx++) { + B1[kx] = getb(kx, fftx, 4); + } + + for (ky = 0; ky < ffty; ky++) { + B2[ky] = getb(ky, ffty, 4); + } + + for (kz = 0; kz < fftz; kz++) { + B3[kz] = getb(kz, fftz, 4); + } + float mprefactor = PI * PI / -beta / beta; + float msq; + for (kx = 0; kx < fftx; kx++) { + kxrp = kx; + if (kx > fftx / 2) kxrp = fftx - kx; + for (ky = 0; ky < ffty; ky++) { + kyrp = ky; + if (ky > ffty / 2) kyrp = ffty - ky; + for (kz = 0; kz <= fftz / 2; kz++) { + kzrp = kz; + + msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + + kzrp * kzrp / box_length[0].z / box_length[0].z; + index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; + if ((kx + ky + kz) == 0) { + PME_BC0[index] = 0; + } else { + PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; + } + + PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; + } + } + } + + cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + free(B1); + free(B2); + free(B3); + free(PME_BC0); + + // initial end + Reset_List<<(3. * atom_numbers) / 128), 128, 0, stream>>>( + 3 * atom_numbers, reinterpret_cast(frc), 0.); + PME_Atom_Near<<>>( + uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, + periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); + Reset_List<<>>(PME_Nall, PME_Q, 0); + + PME_Q_Spread<<>>(PME_atom_near, charge, PME_frxyz, PME_Q, + PME_kxyz, atom_numbers); + + cufftExecR2C(PME_plan_r2c, reinterpret_cast(PME_Q), reinterpret_cast(PME_FQ)); + PME_BCFQ<<>>(PME_FQ, PME_BC, PME_Nfft); + + cufftExecC2R(PME_plan_c2r, reinterpret_cast(PME_FQ), reinterpret_cast(PME_Q)); + + PME_Final<<>>(PME_atom_near, charge, PME_Q, frc, PME_frxyz, + PME_kxyz, PME_inverse_box_vector, atom_numbers); + return; +} + +void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, + cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh new file mode 100644 index 0000000000..360ae6711f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_ + +#include +#include +#include "runtime/device/gpu/cuda_common.h" + +void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, + cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc new file mode 100644 index 0000000000..76275ef9ba --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.cc @@ -0,0 +1,27 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO( + GetCenterOfGeometry, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + GetCenterOfGeometryGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h new file mode 100644 index 0000000000..493c6d23c2 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h @@ -0,0 +1,89 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class GetCenterOfGeometryGpuKernel : public GpuKernel { + public: + GetCenterOfGeometryGpuKernel() : ele_center_atoms(1) {} + ~GetCenterOfGeometryGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + center_numbers = static_cast(GetAttr(kernel_node, "center_numbers")); + center_numbers_inverse = static_cast(GetAttr(kernel_node, "center_numbers_inverse")); + + auto shape_center_atoms = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + + for (size_t i = 0; i < shape_center_atoms.size(); i++) ele_center_atoms *= shape_center_atoms[i]; + for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto center_atoms = GetDeviceAddress(inputs, 0); + auto crd = GetDeviceAddress(inputs, 1); + + auto center_of_geometry = GetDeviceAddress(outputs, 0); + + GetCenterOfGeometry(center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry, + reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_center_atoms * sizeof(T1)); + input_size_list_.push_back(ele_crd * sizeof(T)); + + output_size_list_.push_back(3 * sizeof(T)); + } + + private: + size_t ele_center_atoms = 1; + size_t ele_crd = 1; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int center_numbers; + float center_numbers_inverse; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc new file mode 100644 index 0000000000..ada4cbe675 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.cc @@ -0,0 +1,31 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(MDTemperature, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + MDTemperatureGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h new file mode 100644 index 0000000000..7fae9a2245 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h @@ -0,0 +1,96 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class MDTemperatureGpuKernel : public GpuKernel { + public: + MDTemperatureGpuKernel() : ele_start(1) {} + ~MDTemperatureGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + residue_numbers = static_cast(GetAttr(kernel_node, "residue_numbers")); + + auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_atom_vel = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_atom_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + + for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; + for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; + for (size_t i = 0; i < shape_atom_vel.size(); i++) ele_atom_vel *= shape_atom_vel[i]; + for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto start = GetDeviceAddress(inputs, 0); + auto end = GetDeviceAddress(inputs, 1); + auto atom_vel_f = GetDeviceAddress(inputs, 2); + auto atom_mass = GetDeviceAddress(inputs, 3); + + auto ek = GetDeviceAddress(outputs, 0); + + MDTemperature(residue_numbers, start, end, atom_vel_f, atom_mass, ek, reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_start * sizeof(T1)); + input_size_list_.push_back(ele_end * sizeof(T1)); + input_size_list_.push_back(ele_atom_vel * sizeof(T)); + input_size_list_.push_back(ele_atom_mass * sizeof(T)); + + output_size_list_.push_back(residue_numbers * sizeof(T)); + } + + private: + size_t ele_start = 1; + size_t ele_end = 1; + size_t ele_atom_vel = 1; + size_t ele_atom_mass = 1; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int residue_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc new file mode 100644 index 0000000000..84455f13d3 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(LJEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h new file mode 100644 index 0000000000..8ef49930b0 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h @@ -0,0 +1,130 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJEnergyGpuKernel : public GpuKernel { + public: + LJEnergyGpuKernel() : ele_uint_crd(1) {} + ~LJEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + cutoff_square = static_cast(GetAttr(kernel_node, "cutoff_square")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto d_LJ_energy_atom = GetDeviceAddress(outputs, 0); + LJEnergy(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl, + d_LJ_a, d_LJ_b, d_LJ_energy_atom, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_nl_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_nl = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + float cutoff_square; + int max_nl_numbers = 800; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc new file mode 100644 index 0000000000..9c7c83c639 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(LJForce, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJForceGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h new file mode 100644 index 0000000000..da0a8f06db --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h @@ -0,0 +1,129 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJForceGpuKernel : public GpuKernel { + public: + LJForceGpuKernel() : ele_uint_crd(1) {} + ~LJForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + cutoff_square = static_cast(GetAttr(kernel_node, "cutoff_square")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto frc = GetDeviceAddress(outputs, 0); + LJForce(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl, + d_LJ_a, d_LJ_b, frc, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_nl_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + float cutoff_square; + int max_nl_numbers = 800; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc new file mode 100644 index 0000000000..e4930691b2 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForce, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJForceWithPMEDirectForceGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h new file mode 100644 index 0000000000..f7850a05d8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h @@ -0,0 +1,133 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJForceWithPMEDirectForceGpuKernel : public GpuKernel { + public: + LJForceWithPMEDirectForceGpuKernel() : ele_uint_crd(1) {} + ~LJForceWithPMEDirectForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + cutoff = static_cast(GetAttr(kernel_node, "cutoff")); + pme_beta = static_cast(GetAttr(kernel_node, "pme_beta")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto frc = GetDeviceAddress(outputs, 0); + LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, + nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc, + reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_nl_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_nl = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + float pme_beta; + float cutoff; + int max_nl_numbers = 800; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc new file mode 100644 index 0000000000..131babc923 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14CFAtomEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14CFAtomEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h new file mode 100644 index 0000000000..ef0341331a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h @@ -0,0 +1,114 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14CFAtomEnergyGpuKernel : public GpuKernel { + public: + Dihedral14CFAtomEnergyGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14CFAtomEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto cf_scale_factor = GetDeviceAddress(inputs, 6); + auto ene = GetDeviceAddress(outputs, 0); + + Dihedral14CFAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, + cf_scale_factor, ene, reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); + + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_cf_scale_factor = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc new file mode 100644 index 0000000000..5685e1dab7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14CFEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14CFEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h new file mode 100644 index 0000000000..3e38cf13ab --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h @@ -0,0 +1,114 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14CFEnergyGpuKernel : public GpuKernel { + public: + Dihedral14CFEnergyGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14CFEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto cf_scale_factor = GetDeviceAddress(inputs, 6); + auto ene = GetDeviceAddress(outputs, 0); + + Dihedral14CFEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, + cf_scale_factor, ene, reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); + + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_cf_scale_factor = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc new file mode 100644 index 0000000000..44631e4ae4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14LJAtomEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14LJAtomEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h new file mode 100644 index 0000000000..8cd37d512f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h @@ -0,0 +1,123 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14LJAtomEnergyGpuKernel : public GpuKernel { + public: + Dihedral14LJAtomEnergyGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14LJAtomEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; + for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; + for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto lj_scale_factor = GetDeviceAddress(inputs, 6); + auto LJ_type_A = GetDeviceAddress(inputs, 7); + auto LJ_type_B = GetDeviceAddress(inputs, 8); + auto ene = GetDeviceAddress(outputs, 0); + + Dihedral14LJAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, + lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); + + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_lj_scale_factor = 1; + size_t ele_LJ_type_A = 1; + size_t ele_LJ_type_B = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc new file mode 100644 index 0000000000..aa47797a95 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14LJCFForceWithAtomEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14LJCFForceWithAtomEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h new file mode 100644 index 0000000000..cf3e3b8313 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h @@ -0,0 +1,132 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14LJCFForceWithAtomEnergyGpuKernel : public GpuKernel { + public: + Dihedral14LJCFForceWithAtomEnergyGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14LJCFForceWithAtomEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; + for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; + for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto lj_scale_factor = GetDeviceAddress(inputs, 6); + auto cf_scale_factor = GetDeviceAddress(inputs, 7); + auto LJ_type_A = GetDeviceAddress(inputs, 8); + auto LJ_type_B = GetDeviceAddress(inputs, 9); + auto frc_f = GetDeviceAddress(outputs, 0); + auto atom_energy = GetDeviceAddress(outputs, 1); + + Dihedral14LJCFForceWithAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, + b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, atom_energy, + reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_lj_scale_factor = 1; + size_t ele_cf_scale_factor = 1; + size_t ele_LJ_type_A = 1; + size_t ele_LJ_type_B = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc new file mode 100644 index 0000000000..443784541b --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14LJEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14LJEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h new file mode 100644 index 0000000000..95f1ca5b98 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h @@ -0,0 +1,124 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14LJEnergyGpuKernel : public GpuKernel { + public: + Dihedral14LJEnergyGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14LJEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; + for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; + for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto lj_scale_factor = GetDeviceAddress(inputs, 6); + auto LJ_type_A = GetDeviceAddress(inputs, 7); + auto LJ_type_B = GetDeviceAddress(inputs, 8); + auto ene = GetDeviceAddress(outputs, 0); + + Dihedral14LJEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, + lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); + + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_lj_scale_factor = 1; + size_t ele_LJ_type_A = 1; + size_t ele_LJ_type_B = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc new file mode 100644 index 0000000000..e1b6f059a6 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14LJForce, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14LJForceGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h new file mode 100644 index 0000000000..a1fdb3d3a4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h @@ -0,0 +1,122 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14LJForceGpuKernel : public GpuKernel { + public: + Dihedral14LJForceGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14LJForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; + for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; + for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto lj_scale_factor = GetDeviceAddress(inputs, 6); + auto LJ_type_A = GetDeviceAddress(inputs, 7); + auto LJ_type_B = GetDeviceAddress(inputs, 8); + auto frc_f = GetDeviceAddress(outputs, 0); + Dihedral14LJForce(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, + lj_scale_factor, LJ_type_A, LJ_type_B, frc_f, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_lj_scale_factor = 1; + size_t ele_LJ_type_A = 1; + size_t ele_LJ_type_B = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc new file mode 100644 index 0000000000..36ef602f48 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.cc @@ -0,0 +1,37 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(Dihedral14LJForceWithDirectCF, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + Dihedral14LJForceWithDirectCFGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h new file mode 100644 index 0000000000..d0911d0889 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h @@ -0,0 +1,130 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class Dihedral14LJForceWithDirectCFGpuKernel : public GpuKernel { + public: + Dihedral14LJForceWithDirectCFGpuKernel() : ele_uint_crd(1) {} + ~Dihedral14LJForceWithDirectCFGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + dihedral_14_numbers = static_cast(GetAttr(kernel_node, "dihedral_14_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; + for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; + for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; + for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; + for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; + for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; + for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd_f = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto boxlength_f = GetDeviceAddress(inputs, 3); + auto a_14 = GetDeviceAddress(inputs, 4); + auto b_14 = GetDeviceAddress(inputs, 5); + auto lj_scale_factor = GetDeviceAddress(inputs, 6); + auto cf_scale_factor = GetDeviceAddress(inputs, 7); + auto LJ_type_A = GetDeviceAddress(inputs, 8); + auto LJ_type_B = GetDeviceAddress(inputs, 9); + auto frc_f = GetDeviceAddress(outputs, 0); + + Dihedral14LJForceWithDirectCF(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, + b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, + reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_boxlength_f * sizeof(T)); + input_size_list_.push_back(ele_a_14 * sizeof(T1)); + input_size_list_.push_back(ele_b_14 * sizeof(T1)); + input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); + input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_boxlength_f = 1; + size_t ele_a_14 = 1; + size_t ele_b_14 = 1; + size_t ele_lj_scale_factor = 1; + size_t ele_cf_scale_factor = 1; + size_t ele_LJ_type_A = 1; + size_t ele_LJ_type_B = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int dihedral_14_numbers; + int atom_numbers; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc new file mode 100644 index 0000000000..f0e097c7a4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc @@ -0,0 +1,45 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(NeighborListUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + NeighborListUpdateGpuKernel, int, float) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h new file mode 100644 index 0000000000..198a04dd7e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h @@ -0,0 +1,170 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class NeighborListUpdateGpuKernel : public GpuKernel { + public: + NeighborListUpdateGpuKernel() : skin(2.0), cutoff(10.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {} + ~NeighborListUpdateGpuKernel() override = default; + bool Init(const CNodePtr &kernel_node) override { + grid_numbers = static_cast(GetAttr(kernel_node, "grid_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + refresh_count = static_cast(GetAttr(kernel_node, "refresh_count")); + refresh_interval = static_cast(GetAttr(kernel_node, "refresh_interval")); + not_first_time = static_cast(GetAttr(kernel_node, "not_first_time")); + Nxy = static_cast(GetAttr(kernel_node, "Nxy")); + excluded_atom_numbers = static_cast(GetAttr(kernel_node, "excluded_atom_numbers")); + + cutoff_square = static_cast(GetAttr(kernel_node, "cutoff_square")); + half_skin_square = static_cast(GetAttr(kernel_node, "half_skin_square")); + cutoff_with_skin = static_cast(GetAttr(kernel_node, "cutoff_with_skin")); + half_cutoff_with_skin = static_cast(GetAttr(kernel_node, "half_cutoff_with_skin")); + cutoff_with_skin_square = static_cast(GetAttr(kernel_node, "cutoff_with_skin_square")); + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspaces, + const std::vector &outputs, void *stream_ptr) override { + auto atom_numbers_in_grid_bucket = GetDeviceAddress(inputs, 0); + auto bucket = GetDeviceAddress(inputs, 1); + auto crd = GetDeviceAddress(inputs, 2); + auto box_length = GetDeviceAddress(inputs, 3); + auto grid_N = GetDeviceAddress(inputs, 4); + auto grid_length_inverse = GetDeviceAddress(inputs, 5); + auto atom_in_grid_serial = GetDeviceAddress(inputs, 6); + auto old_crd = GetDeviceAddress(inputs, 7); + auto crd_to_uint_crd_cof = GetDeviceAddress(inputs, 8); + auto uint_crd = GetDeviceAddress(inputs, 9); + auto gpointer = GetDeviceAddress(inputs, 10); + auto nl_atom_numbers = GetDeviceAddress(inputs, 11); + auto nl_atom_serial = GetDeviceAddress(inputs, 12); + auto uint_dr_to_dr_cof = GetDeviceAddress(inputs, 13); + auto excluded_list_start = GetDeviceAddress(inputs, 14); + auto excluded_list = GetDeviceAddress(inputs, 15); + auto excluded_numbers = GetDeviceAddress(inputs, 16); + auto need_refresh_flag = GetDeviceAddress(inputs, 17); + + GRID_BUCKET *d_bucket = reinterpret_cast(GetDeviceAddress(workspaces, 0)); + GRID_POINTER *d_gpointer = reinterpret_cast(GetDeviceAddress(workspaces, 1)); + NEIGHBOR_LIST *nl = GetDeviceAddress(workspaces, 2); + float *half_crd_to_uint_crd_cof = GetDeviceAddress(workspaces, 3); + + std::vector h_bucket(grid_numbers); + for (size_t i = 0; i < h_bucket.size(); i += 1) { + h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers; + } + std::vector h_gpointer(grid_numbers); + for (size_t i = 0; i < h_gpointer.size(); i += 1) { + h_gpointer[i].grid_serial = gpointer + i * 125; + } + + cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl, + reinterpret_cast(stream_ptr)); + + Neighbor_List_Update(grid_numbers, atom_numbers, refresh_count, refresh_interval, not_first_time, skin, Nxy, + cutoff_square, cutoff_with_skin_square, grid_N, box_length, atom_numbers_in_grid_bucket, + grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof, + half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start, + excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, + reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(sizeof(int) * grid_numbers); + input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers); + input_size_list_.push_back(sizeof(VECTOR) * atom_numbers); + input_size_list_.push_back(sizeof(VECTOR)); + + input_size_list_.push_back(sizeof(INT_VECTOR)); + input_size_list_.push_back(sizeof(VECTOR)); + input_size_list_.push_back(sizeof(int) * atom_numbers); + + input_size_list_.push_back(sizeof(VECTOR) * atom_numbers); + input_size_list_.push_back(sizeof(VECTOR)); + input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers); + + input_size_list_.push_back(sizeof(int) * grid_numbers * 125); + input_size_list_.push_back(sizeof(int) * atom_numbers); + input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers); + input_size_list_.push_back(sizeof(VECTOR)); + + input_size_list_.push_back(sizeof(int) * atom_numbers); + input_size_list_.push_back(sizeof(int) * excluded_atom_numbers); + input_size_list_.push_back(sizeof(int) * atom_numbers); + + input_size_list_.push_back(sizeof(int)); + + workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers); + workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers); + workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers); + workspace_size_list_.push_back(sizeof(float) * 3); + + output_size_list_.push_back(sizeof(float)); + } + + private: + float skin; + float cutoff; + int not_first_time; + int atom_numbers; + int grid_numbers; + int refresh_count; + int refresh_interval; + int Nxy; + int max_atom_in_grid_numbers; + int max_neighbor_numbers; + int excluded_atom_numbers; + float half_skin_square; + float cutoff_square; + float cutoff_with_skin; + float half_cutoff_with_skin; + float cutoff_with_skin_square; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc new file mode 100644 index 0000000000..f55a5c2690 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(MDIterationLeapFrog, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + MDIterationLeapFrogGpuKernel, float) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h new file mode 100644 index 0000000000..c7a121ce1d --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h @@ -0,0 +1,115 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ + +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh" + +namespace mindspore { +namespace kernel { + +template +class MDIterationLeapFrogGpuKernel : public GpuKernel { + public: + MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {} + ~MDIterationLeapFrogGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + float4_numbers = static_cast(GetAttr(kernel_node, "float4_numbers")); + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + half_dt = static_cast(GetAttr(kernel_node, "half_dt")); + dt = static_cast(GetAttr(kernel_node, "dt")); + exp_gamma = static_cast(GetAttr(kernel_node, "exp_gamma")); + is_max_velocity = static_cast(GetAttr(kernel_node, "is_max_velocity")); + max_velocity = static_cast(GetAttr(kernel_node, "max_velocity")); + + // printf("float4_numbers: %d", float4_numbers); + // printf("atom_numbers: %d", atom_numbers); + // printf("half_dt: %f", half_dt); + // printf("dt: %f", dt); + // printf("exp_gamma: %f", exp_gamma); + // printf("is_max_velocity: %d", is_max_velocity); + // printf("max_velocity: %f", max_velocity); + + auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + + for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + auto d_mass_inverse = GetDeviceAddress(inputs, 0); + auto d_sqrt_mass = GetDeviceAddress(inputs, 1); + + auto vel_f = GetDeviceAddress(outputs, 0); + auto crd_f = GetDeviceAddress(outputs, 1); + auto frc_f = GetDeviceAddress(outputs, 2); + auto acc_f = GetDeviceAddress(outputs, 3); + + MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity, + d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f, + reinterpret_cast(stream_ptr)); + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_mass_inverse * sizeof(T)); + input_size_list_.push_back(ele_sqrt_mass * sizeof(T)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + } + + private: + size_t ele_mass_inverse = 1; + size_t ele_sqrt_mass = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int float4_numbers; + int atom_numbers; + float half_dt; + float dt; + float exp_gamma; + int is_max_velocity; + float max_velocity; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc new file mode 100644 index 0000000000..94e01ff08d --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + PMEEnergyGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h new file mode 100644 index 0000000000..37834b078c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h @@ -0,0 +1,147 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_ +#include +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh" +namespace mindspore { +namespace kernel { +template +class PMEEnergyGpuKernel : public GpuKernel { + public: + PMEEnergyGpuKernel() : ele_uint_crd(1) {} + ~PMEEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + fftx = static_cast(GetAttr(kernel_node, "fftx")); + ffty = static_cast(GetAttr(kernel_node, "ffty")); + fftz = static_cast(GetAttr(kernel_node, "fftz")); + PME_Nall = fftx * ffty * fftz; + PME_Nfft = fftx * ffty * (fftz / 2 + 1); + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto boxlength = GetDeviceAddress(inputs, 0); + auto uint_crd = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto nl_numbers = GetDeviceAddress(inputs, 3); + auto nl_serial = GetDeviceAddress(inputs, 4); + auto scaler = GetDeviceAddress(inputs, 5); + auto excluded_list_start = GetDeviceAddress(inputs, 6); + auto excluded_list = GetDeviceAddress(inputs, 7); + auto excluded_atom_numbers = GetDeviceAddress(inputs, 8); + + auto pme_uxyz = GetDeviceAddress(workspace, 0); // workspace + auto pme_frxyz = GetDeviceAddress(workspace, 1); // workspace + auto pme_q = GetDeviceAddress(workspace, 2); // workspace + auto pme_fq = GetDeviceAddress(workspace, 3); // workspace + auto pme_atom_near = GetDeviceAddress(workspace, 4); // workspace + auto pme_bc = GetDeviceAddress(workspace, 5); // workspace + auto pme_kxyz = GetDeviceAddress(workspace, 6); // workspace + auto nl = GetDeviceAddress(workspace, 7); + + auto reciprocal_ene = GetDeviceAddress(outputs, 0); + auto self_ene = GetDeviceAddress(outputs, 1); + auto direct_ene = GetDeviceAddress(outputs, 2); + auto correction_ene = GetDeviceAddress(outputs, 3); + + PMEEnergy(fftx, ffty, fftz, atom_numbers, beta, boxlength, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, + pme_atom_near, pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start, + excluded_list, excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene, + reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_nl_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + + workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + workspace_size_list_.push_back(PME_Nall * sizeof(T)); + workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); + workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); + workspace_size_list_.push_back(PME_Nfft * sizeof(float)); + workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); + + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int max_nl_numbers = 800; + int fftx; + int ffty; + int fftz; + float beta; + int PME_Nall; + int PME_Nfft; + struct VECTOR { + float x; + float y; + float z; + }; + + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; + + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc new file mode 100644 index 0000000000..822ccb881c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEExcludedForce, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + PMEExcludedForceGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h new file mode 100644 index 0000000000..4eca3b7ffd --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h @@ -0,0 +1,95 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ +#include +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" +namespace mindspore { +namespace kernel { +template +class PMEExcludedForceGpuKernel : public GpuKernel { + public: + PMEExcludedForceGpuKernel() : ele_uint_crd(1) {} + ~PMEExcludedForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto sacler = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto excluded_list_start = GetDeviceAddress(inputs, 3); + auto excluded_list = GetDeviceAddress(inputs, 4); + auto excluded_atom_numbers = GetDeviceAddress(inputs, 5); + + auto force = GetDeviceAddress(outputs, 0); + PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list, + excluded_atom_numbers, force, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + float beta; + struct VECTOR { + float x; + float y; + float z; + }; + + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc new file mode 100644 index 0000000000..94c2e7130f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.cc @@ -0,0 +1,29 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEReciprocalForce, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + PMEReciprocalForceGpuKernel, float, int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h new file mode 100644 index 0000000000..161fb6ccc5 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h @@ -0,0 +1,119 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ +#include +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" +namespace mindspore { +namespace kernel { +template +class PMEReciprocalForceGpuKernel : public GpuKernel { + public: + PMEReciprocalForceGpuKernel() : ele_uint_crd(1) {} + ~PMEReciprocalForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + fftx = static_cast(GetAttr(kernel_node, "fftx")); + ffty = static_cast(GetAttr(kernel_node, "ffty")); + fftz = static_cast(GetAttr(kernel_node, "fftz")); + PME_Nall = fftx * ffty * fftz; + PME_Nfft = fftx * ffty * (fftz / 2 + 1); + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto boxlength = GetDeviceAddress(inputs, 0); + auto uint_crd = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + + auto pme_uxyz = GetDeviceAddress(workspace, 0); // workspace + auto pme_frxyz = GetDeviceAddress(workspace, 1); // workspace + auto pme_q = GetDeviceAddress(workspace, 2); // workspace + auto pme_fq = GetDeviceAddress(workspace, 3); // workspace + auto pme_atom_near = GetDeviceAddress(workspace, 4); // workspace + auto pme_bc = GetDeviceAddress(workspace, 5); // workspace + auto pme_kxyz = GetDeviceAddress(workspace, 6); // workspace + + auto force = GetDeviceAddress(outputs, 0); + + PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near, + pme_kxyz, boxlength, uint_crd, charge, force, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + + workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + workspace_size_list_.push_back(PME_Nall * sizeof(T)); + workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); + workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); + workspace_size_list_.push_back(PME_Nfft * sizeof(float)); + workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); + + output_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + } + + private: + size_t ele_uint_crd = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int fftx; + int ffty; + int fftz; + float beta; + int PME_Nall; + int PME_Nfft; + + struct VECTOR { + float x; + float y; + float z; + }; + + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/cxx_api/CMakeLists.txt b/mindspore/ccsrc/cxx_api/CMakeLists.txt index 5041dea655..59c967ba16 100644 --- a/mindspore/ccsrc/cxx_api/CMakeLists.txt +++ b/mindspore/ccsrc/cxx_api/CMakeLists.txt @@ -114,7 +114,8 @@ if(ENABLE_GPU) ${CUDNN_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcudart.so ${CUDA_PATH}/lib64/stubs/libcuda.so - ${CUDA_PATH}/lib64/libcusolver.so) + ${CUDA_PATH}/lib64/libcusolver.so + ${CUDA_PATH}/lib64/libcufft.so) endif() if(CMAKE_SYSTEM_NAME MATCHES "Linux") diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 762adcfd03..9282449e4e 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -100,8 +100,13 @@ from ._embedding_cache_ops import (CacheSwapHashmap, SearchCacheIdx, CacheSwapTa MapUniform, DynamicAssign, PadAndShift) from .quantum_ops import PQC, Evolution from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAtomEnergy, BondForceWithAtomVirial, - DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy, - AngleForce, AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy) + DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy, AngleForce, + AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce, + LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce, + Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy, + Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog, + GetCenterOfGeometry, MDTemperature, NeighborListUpdate) + __all__ = [ 'Unique', @@ -438,6 +443,24 @@ __all__ = [ "AngleEnergy", "AngleAtomEnergy", "AngleForceWithAtomEnergy", + 'PMEReciprocalForce', + 'LJForce', + 'LJForceWithPMEDirectForce', + 'LJEnergy', + 'PMEExcludedForce', + 'PMEEnergy', + "Dihedral14LJForce", + "Dihedral14LJEnergy", + "Dihedral14LJForceWithDirectCF", + "Dihedral14LJCFForceWithAtomEnergy", + "Dihedral14LJAtomEnergy", + "Dihedral14CFEnergy", + "MDIterationLeapFrog", + "Dihedral14CFAtomEnergy", + "GetCenterOfGeometry", + "MDTemperature", + "NeighborListUpdate", + ] __all__.sort() diff --git a/mindspore/ops/operations/sponge_ops.py b/mindspore/ops/operations/sponge_ops.py index 3737fce043..d25e74553b 100644 --- a/mindspore/ops/operations/sponge_ops.py +++ b/mindspore/ops/operations/sponge_ops.py @@ -1,902 +1,1988 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -"""Operators for sponge.""" - -from ..primitive import PrimitiveWithInfer, prim_attr_register -from ..._checkparam import Validator as validator -from ...common import dtype as mstype -from ..._checkparam import Rel - - -class BondForce(PrimitiveWithInfer): - """ - BondForce: - - Calculate the force exerted by the simple harmonic bond on the - corresponding atoms. Assume the number of harmonic bonds is M and - the number of atoms is N. - - .. math:: - - dr = (x_1-x_2, y_1-y_2, z_1-z_2) - F = (F_x, F_y, F_z) = 2*k*(1 - r_0/|dr|)*dr - - Inputs: - - **uint_crd_f** (Tensor, uint32 ) - [N, 3], the unsigned int coordinate - value of each atom. - - **scaler_f** (Tensor, float32) - [3, 1], the 3-D scale factor (x, y, z), - between the real space float coordinates and the unsigned int coordinates. - - **atom_a** (Tensor, int32) - [M, 1], the first atom index of each bond. - - **atom_b** (Tensor, int32) - [M, 1], the second atom index of each bond. - - **bond_k** (Tensor, float32) - [M, 1], the force constant of each bond. - - **bond_r0** (Tensor, float32) - [M, 1], the equlibrium length of each bond. - - Outputs: - - **frc_f** (float32 Tensor) - [N, 3], the force felt by each atom. - - Supported Platforms: - ``GPU`` - Examples: - """ - - @prim_attr_register - def __init__(self, bond_numbers): - self.bond_numbers = bond_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], - outputs=['frc_f']) - self.add_prim_attr('bond_numbers', self.bond_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): - cls_name = self.name - # N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) - validator.check_int( - bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) - return uint_crd_f_shape - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) - return bond_r0_type - - -class BondEnergy(PrimitiveWithInfer): - """ - BondEnergyCuda: - - Calculate the harmonic potential energy between each bonded atom pair. - Assume our system has N atoms and M harmonic bonds. - - .. math:: - - dr = (x_1-x_2, y_1-y_2, z_1-z_2) - E = k*(|dr| - r_0)^2 - - Inputs: - Same as operator BondForce(). - - .. math:: - - dr = (x_1-x_2, y_1-y_2, z_1-z_2) - E = k*(|dr| - r_0)^2 - - Outputs: - - **bond_ene** (Tensor, float32) - [M, 1], the harmonic potential energy - for each bond. - - Supported Platforms: - ``GPU`` - Examples: - """ - - @prim_attr_register - def __init__(self, bond_numbers): - self.bond_numbers = bond_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], - outputs=['bond_ene']) - self.add_prim_attr('bond_numbers', self.bond_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): - cls_name = self.name - # N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) - validator.check_int( - bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) - - return bond_k_shape - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) - return bond_r0_type - - -class BondAtomEnergy(PrimitiveWithInfer): - """ - BondAtomEnergyCuda: - - Add the potential energy caused by simple harmonic bonds to the total - potential energy of each atom. - - The calculation formula is the same as operator BondEnergy(). - - Inputs: - Same as operator BondForce(). - - Outputs: - - **atom_ene** (Tensor, float32) - [N, 1], the accumulated potential - energy for each atom. - - Supported Platforms: - ``GPU`` - Examples: - """ - - @prim_attr_register - def __init__(self, bond_numbers): - self.bond_numbers = bond_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], - outputs=['atom_ene']) - self.add_prim_attr('bond_numbers', self.bond_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) - validator.check_int( - bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) - return [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) - return bond_r0_type - - -class BondForceWithAtomEnergy(PrimitiveWithInfer): - """ - BondForceWithAtomEnergy: - - Calculate bond force and harmonic potential energy together. - - The calculation formula is the same as operator BondForce() and BondEnergy(). - - Inputs: - Same as operator BondForce(). - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce(). - - **atom_e** (Tensor, float32) - [N, 1], same as atom_ene in operator BondAtomEnergy(). - - Supported Platforms: - ``GPU`` - Examples: - """ - - @prim_attr_register - def __init__(self, bond_numbers): - self.bond_numbers = bond_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], - outputs=['frc_f', 'atom_e']) - self.add_prim_attr('bond_numbers', self.bond_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) - validator.check_int( - bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) - return uint_crd_f_shape, [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) - return bond_r0_type, bond_r0_type - - -class BondForceWithAtomVirial(PrimitiveWithInfer): - """ - BondForceWithAtomVirial: - - Calculate bond force and the virial coefficient caused by simple harmonic - bond for each atom together. - - The calculation formula of the force part is the same as operator BondForce(). - The Virial part is as follows: - - .. math:: - - dr = (x_1-x_2, y_1-y_2, z_1-z_2) - virial = |dr|*(|dr| - r_0)*k - - Inputs: - Same as operator BondForce() - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce(). - - **atom_v** (Tensor, float32) - [N, 1], the accumulated virial coefficient - for each atom. - - Supported Platforms: - ``GPU`` - Examples: - """ - - @prim_attr_register - def __init__(self, bond_numbers): - self.bond_numbers = bond_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], - outputs=['frc_f', 'atom_v']) - self.add_prim_attr('bond_numbers', self.bond_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) - validator.check_int( - bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) - return uint_crd_f_shape, [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) - return bond_r0_type, bond_r0_type - - -class DihedralForce(PrimitiveWithInfer): - """ - DihedralForce: - - Calculate the force exerted by the dihedral term which made of 4-atoms - on the corresponding atoms. Assume the number of dihedral terms is M and - the number of atoms is N. - - .. math:: - - dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) - dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) - dr_{cd} = (x_d-x_c, y_d-y_c, z_d-z_c) - - r1 = dr_{ab}*dr_{cb} - r2 = dr_{cd}*dr_{cb} - - phi = pi - sign(inner_product(r1*r2), dr_{cb}) - * arccos(inner_product(r1, r2)/|r1|/|r2|) - dEdphi = n*phi*(k*cos(phi_0)*sin(n*phi) - k*sin(phi_0)*cos(n*phi))/sin(phi) - dphidr1 = r2/|r1|/|r2| + cos(phi)/|r1|^2*r1 - dphidr2 = r1/|r1|/|r2| + cos(phi)/|r2|^2*r2 - - dEdra = dEdphi * dr_{cb} * dphidr1 - dEdrd = dEdphi * dphi_dr2 * dr_{cb} - dEdrjpart = dEdphi * ((dr_{ab} * dphidr1) + (dr_{cd} * dphidr2)) - - F_a = dEdri - F_b = dEdrjpart - dEdri - F_c = - dEdrl - dEdrjpart - F_d = dEdrl - - Inputs: - - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinates - value of each atom. - - **scalar_f** (Tensor, float32) - [3, ], the 3-D scale factor between - the real space float coordinates and the unsigned int coordinates. - - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each dihedral. - - **atom_b** (Tensor, int32) - [M, ], the 2nd atom index of each dihedral. - - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each dihedral. - - **atom_d** (Tensor, int32) - [M, ], the 4th atom index of each dihedral. - 4 atoms are connected in the form a-b-c-d. - - **ipn** (Tensor, int32) - [M, ], the period of dihedral angle of each dihedral. - - **pk** (Tensor, float32) - [M, ], the force constant of each dihedral. - - **gamc** (Tensor, float32) - [M, ], k*cos(phi_0) of each dihedral. - - **gams** (Tensor, float32) - [M, ], k*sin(phi_0) of each dihedral. - - **pn** (Tensor, float32) - [M, ], the floating point form of ipn. - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, dihedral_numbers): - self.dihedral_numbers = dihedral_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', - 'gamc', 'gams', 'pn'], - outputs=['frc_f']) - self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, - ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): - cls_name = self.name - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) - validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) - validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) - validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) - validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) - validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) - return uint_crd_f_shape - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, - ipn_type, pk_type, gamc_type, gams_type, pn_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) - return pn_type - - -class DihedralEnergy(PrimitiveWithInfer): - """ - DihedralEnergy: - - Calculate the potential energy caused by dihedral terms for each 4-atom pair. - Assume our system has N atoms and M dihedral terms. - - .. math:: - - E = k(1 + cos(n*phi - phi_0)) - - Inputs: - Same as operator DihedralForce(). - - Outputs: - - **ene** (Tensor, float32) - [M, ], the potential energy for each - dihedral term. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, dihedral_numbers): - self.dihedral_numbers = dihedral_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', - 'gamc', 'gams', 'pn'], - outputs=['ene']) - self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, - ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): - cls_name = self.name - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) - validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) - validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) - validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) - validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) - validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) - return [M,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, - ipn_type, pk_type, gamc_type, gams_type, pn_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) - return pn_type - - -class DihedralAtomEnergy(PrimitiveWithInfer): - """ - DihedralAtomEnergy: - - Add the potential energy caused by dihedral terms to the total potential - energy of each atom. - - The calculation formula is the same as operator DihedralEnergy(). - - Inputs: - Same as operator DihedralEnergy(). - - Outputs: - - **ene** (Tensor, float32) - [N, ], the accumulated potential - energy for each atom. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, dihedral_numbers): - self.dihedral_numbers = dihedral_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', - 'gamc', 'gams', 'pn'], - outputs=['ene']) - self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, - ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) - validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) - validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) - validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) - validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) - validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) - return [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, - ipn_type, pk_type, gamc_type, gams_type, pn_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) - return pn_type - - -class DihedralForceWithAtomEnergy(PrimitiveWithInfer): - """ - DihedralForceWithAtomEnergy: - - Calculate dihedral force and potential energy together. - - The calculation formula is the same as operator DihedralForce() and DihedralEnergy(). - - Inputs: - Same as operator DihedralForce(). - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], same as operator DihedralForce(). - - **ene** (Tensor, float32) - [N, ], same as operator DihedralAtomEnergy(). - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, dihedral_numbers): - self.dihedral_numbers = dihedral_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', - 'gamc', 'gams', 'pn'], - outputs=['frc_f', 'ene']) - self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, - ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) - validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) - validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) - validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) - validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) - validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) - return uint_crd_f_shape, [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, - ipn_type, pk_type, gamc_type, gams_type, pn_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) - return pn_type, pn_type - - -class AngleForce(PrimitiveWithInfer): - """ - AngleForce: - - Calculate the force exerted by angles made of 3 atoms on the - corresponding atoms. Assume the number of angles is M and the - number of atoms is N. - - .. math:: - - dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) - dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) - theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|) - F_a = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{ab}|^2*dr_{ab} - - 1/|dr_{ab}|/|dr_{cb}|*dr_{cb}] - F_c = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{cb}|^2*dr_{cb} - - 1/|dr_{cb}|/|dr_{ab}|*dr_{ab}] - F_b = -F_a - F_c - - Inputs: - - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate - value of each atom. - - **scaler_f** (Tensor, float32) - [3, ], the 3-D scale factor between - the real space float coordinates and the unsigned int coordinates. - - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each angle. - - **atom_b** (Tensor, int32) - [M, ], the 2nd and the central atom index - of each angle. - - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each angle. - - **angle_k** (Tensor, float32) - [M, ], the force constant for each angle. - - **angle_theta0** (Tensor, float32) - [M, ], the equilibrium position value - for each angle. - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, angle_numbers): - self.angle_numbers = angle_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', - 'angle_theta0'], - outputs=['frc_f']) - self.add_prim_attr('angle_numbers', self.angle_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, - angle_theta0_shape): - cls_name = self.name - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) - validator.check_int( - angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) - return uint_crd_f_shape - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, - angle_theta0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) - return angle_k_type - - -class AngleEnergy(PrimitiveWithInfer): - """ - AngleEnergy: - - Calculate the energy caused by 3-atoms angle term. - - .. math:: - - dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) - dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) - theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|) - E = k*(theta - theta_0)^2 - - Inputs: - Same as operator AngleForce(). - - Outputs: - - **ene** (Tensor, float32) - [M, ], the potential energy for - each angle term. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, angle_numbers): - self.angle_numbers = angle_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', - 'angle_theta0'], - outputs=['ene']) - self.add_prim_attr('angle_numbers', self.angle_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, - angle_theta0_shape): - cls_name = self.name - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) - validator.check_int( - angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) - return [M,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, - angle_theta0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) - return angle_k_type - - -class AngleAtomEnergy(PrimitiveWithInfer): - """ - AngleAtomEnergy: - - Add the potential energy caused by angle terms to the total potential - energy of each atom. - - The calculation formula is the same as operator AngleEnergy(). - - Inputs: - Same as operator AngleForce(). - - Outputs: - - **ene** (Tensor, float32) - [N, ], the accumulated potential energy - for each atom. - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, angle_numbers): - self.angle_numbers = angle_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', - 'angle_theta0'], - outputs=['ene']) - self.add_prim_attr('angle_numbers', self.angle_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, - angle_theta0_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) - validator.check_int( - angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) - return [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, - angle_theta0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) - return angle_k_type - - -class AngleForceWithAtomEnergy(PrimitiveWithInfer): - """ - AngleForceWithAtomEnergy: - - Calculate angle force and potential energy together. - - The calculation formula is the same as operator AngleForce() and AngleEnergy(). - - Inputs: - Same as operator AngleForce(). - - Outputs: - - **frc_f** (Tensor, float32) - [N, 3], same as operator AngleForce(). - - **ene** (Tensor, float) - [N, ], same as operator AngleAtomEnergy(). - - Supported Platforms: - ``GPU`` - - Examples: - """ - - @prim_attr_register - def __init__(self, angle_numbers): - self.angle_numbers = angle_numbers - self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', - 'angle_theta0'], - outputs=['frc_f', 'ene']) - self.add_prim_attr('angle_numbers', self.angle_numbers) - - def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, - angle_theta0_shape): - cls_name = self.name - N = uint_crd_f_shape[0] - M = atom_a_shape[0] - validator.check_int( - uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) - validator.check_int( - scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) - validator.check_int( - atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) - validator.check_int( - atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) - validator.check_int( - atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) - validator.check_int( - angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) - validator.check_int( - angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) - return uint_crd_f_shape, [N,] - - def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, - angle_theta0_type): - validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) - return angle_k_type, angle_k_type +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Operators for sponge.""" + +from ..primitive import PrimitiveWithInfer, prim_attr_register +from ..._checkparam import Rel +from ..._checkparam import Validator as validator +from ...common import dtype as mstype + + +class BondForce(PrimitiveWithInfer): + """ + BondForce: + + Calculate the force exerted by the simple harmonic bond on the + corresponding atoms. Assume the number of harmonic bonds is M and + the number of atoms is N. + + .. math:: + + dr = (x_1-x_2, y_1-y_2, z_1-z_2) + F = (F_x, F_y, F_z) = 2*k*(1 - r_0/|dr|)*dr + + Inputs: + - **atom_numbers** (int32) - the number of atoms N. + - **bond_numbers** (int32) - the number of harmonic bonds M. + - **uint_crd_f** (Tensor, uint32 ) - [N, 3], the unsigned int coordinate + value of each atom. + - **scaler_f** (Tensor, float32) - [3,], the 3-D scale factor (x, y, z), + between the real space float coordinates and the unsigned int coordinates. + - **atom_a** (Tensor, int32) - [M,], the first atom index of each bond. + - **atom_b** (Tensor, int32) - [M,], the second atom index of each bond. + - **bond_k** (Tensor, float32) - [M,], the force constant of each bond. + - **bond_r0** (Tensor, float32) - [M,], the equlibrium length of each bond. + + Outputs: + - **frc_f** (float32 Tensor) - [N, 3], the force felt by each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, bond_numbers, atom_numbers): + self.bond_numbers = bond_numbers + self.atom_numbers = atom_numbers + self.add_prim_attr('bond_numbers', self.bond_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], + outputs=['frc_f']) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): + cls_name = self.name + N = self.atom_numbers + M = self.bond_numbers + validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) + validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) + return uint_crd_f_shape + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) + return bond_r0_type + + +class BondEnergy(PrimitiveWithInfer): + """ + BondEnergyCuda: + + Calculate the harmonic potential energy between each bonded atom pair. + Assume our system has N atoms and M harmonic bonds. + + .. math:: + + dr = (x_1-x_2, y_1-y_2, z_1-z_2) + E = k*(|dr| - r_0)^2 + + Inputs: + Same as operator BondForce(). + + .. math:: + + dr = (x_1-x_2, y_1-y_2, z_1-z_2) + E = k*(|dr| - r_0)^2 + + Outputs: + - **bond_ene** (Tensor, float32) - [M,], the harmonic potential energy + for each bond. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, bond_numbers, atom_numbers): + self.bond_numbers = bond_numbers + self.atom_numbers = atom_numbers + self.add_prim_attr('bond_numbers', self.bond_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], + outputs=['bond_ene']) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): + cls_name = self.name + N = self.atom_numbers + M = self.bond_numbers + validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) + validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) + + return bond_k_shape + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) + return bond_r0_type + + +class BondAtomEnergy(PrimitiveWithInfer): + """ + BondAtomEnergyCuda: + + Add the potential energy caused by simple harmonic bonds to the total + potential energy of each atom. + + The calculation formula is the same as operator BondEnergy(). + + Inputs: + Same as operator BondForce(). + + Outputs: + - **atom_ene** (Tensor, float32) - [N,], the accumulated potential + energy for each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, bond_numbers, atom_numbers): + self.bond_numbers = bond_numbers + self.atom_numbers = atom_numbers + self.add_prim_attr('bond_numbers', self.bond_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], + outputs=['atom_ene']) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): + cls_name = self.name + N = self.atom_numbers + M = self.bond_numbers + validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) + validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) + + return [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) + return bond_r0_type + + +class BondForceWithAtomEnergy(PrimitiveWithInfer): + """ + BondForceWithAtomEnergy: + + Calculate bond force and harmonic potential energy together. + + The calculation formula is the same as operator BondForce() and BondEnergy(). + + Inputs: + Same as operator BondForce(). + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce(). + - **atom_e** (Tensor, float32) - [N,], same as atom_ene in operator BondAtomEnergy(). + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, bond_numbers, atom_numbers): + self.bond_numbers = bond_numbers + self.atom_numbers = atom_numbers + self.add_prim_attr('bond_numbers', self.bond_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], + outputs=['frc_f', 'atom_e']) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): + cls_name = self.name + N = self.atom_numbers + M = self.bond_numbers + validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) + validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) + + return uint_crd_f_shape, [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + + validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) + return bond_r0_type, bond_r0_type + + +class BondForceWithAtomVirial(PrimitiveWithInfer): + """ + BondForceWithAtomVirial: + + Calculate bond force and the virial coefficient caused by simple harmonic + bond for each atom together. + + The calculation formula of the force part is the same as operator BondForce(). + The Virial part is as follows: + + .. math:: + + dr = (x_1-x_2, y_1-y_2, z_1-z_2) + virial = |dr|*(|dr| - r_0)*k + + Inputs: + Same as operator BondForce() + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], same as operator BondForce(). + - **atom_v** (Tensor, float32) - [N,], the accumulated virial coefficient + for each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, bond_numbers, atom_numbers): + self.bond_numbers = bond_numbers + self.atom_numbers = atom_numbers + self.add_prim_attr('bond_numbers', self.bond_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'bond_k', 'bond_r0'], + outputs=['frc_f', 'atom_v']) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, bond_k_shape, bond_r0_shape): + cls_name = self.name + N = self.atom_numbers + M = self.bond_numbers + validator.check_int(uint_crd_f_shape[0], N, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(bond_k_shape[0], M, Rel.EQ, "bond_k_shape", cls_name) + validator.check_int(bond_r0_shape[0], M, Rel.EQ, "bond_r0_shape", cls_name) + + return uint_crd_f_shape, [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, bond_k_type, bond_r0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + + validator.check_tensor_dtype_valid('bond_k_type', bond_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('bond_r0_type', bond_r0_type, [mstype.float32], self.name) + return bond_r0_type, bond_r0_type + + +class DihedralForce(PrimitiveWithInfer): + """ + DihedralForce: + + Calculate the force exerted by the dihedral term which made of 4-atoms + on the corresponding atoms. Assume the number of dihedral terms is M and + the number of atoms is N. + + .. math:: + + dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) + dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) + dr_{cd} = (x_d-x_c, y_d-y_c, z_d-z_c) + + r1 = dr_{ab}*dr_{cb} + r2 = dr_{cd}*dr_{cb} + + phi = pi - sign(inner_product(r1*r2), dr_{cb}) + * arccos(inner_product(r1, r2)/|r1|/|r2|) + dEdphi = n*phi*(k*cos(phi_0)*sin(n*phi) - k*sin(phi_0)*cos(n*phi))/sin(phi) + dphidr1 = r2/|r1|/|r2| + cos(phi)/|r1|^2*r1 + dphidr2 = r1/|r1|/|r2| + cos(phi)/|r2|^2*r2 + + dEdra = dEdphi * dr_{cb} * dphidr1 + dEdrd = dEdphi * dphi_dr2 * dr_{cb} + dEdrjpart = dEdphi * ((dr_{ab} * dphidr1) + (dr_{cd} * dphidr2)) + + F_a = dEdri + F_b = dEdrjpart - dEdri + F_c = - dEdrl - dEdrjpart + F_d = dEdrl + + Inputs: + - **dihedral_numbers** (int32) - the number of dihedral terms M. + - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinates + value of each atom. + - **scalar_f** (Tensor, float32) - [3, ], the 3-D scale factor between + the real space float coordinates and the unsigned int coordinates. + - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each dihedral. + - **atom_b** (Tensor, int32) - [M, ], the 2nd atom index of each dihedral. + - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each dihedral. + - **atom_d** (Tensor, int32) - [M, ], the 4th atom index of each dihedral. + 4 atoms are connected in the form a-b-c-d. + - **ipn** (Tensor, int32) - [M, ], the period of dihedral angle of each dihedral. + - **pk** (Tensor, float32) - [M, ], the force constant of each dihedral. + - **gamc** (Tensor, float32) - [M, ], k*cos(phi_0) of each dihedral. + - **gams** (Tensor, float32) - [M, ], k*sin(phi_0) of each dihedral. + - **pn** (Tensor, float32) - [M, ], the floating point form of ipn. + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, dihedral_numbers): + self.dihedral_numbers = dihedral_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', + 'gamc', 'gams', 'pn'], + outputs=['frc_f']) + self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, + ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): + cls_name = self.name + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) + validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) + validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) + validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) + validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) + validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) + return uint_crd_f_shape + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, + ipn_type, pk_type, gamc_type, gams_type, pn_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) + + return pn_type + + +class DihedralEnergy(PrimitiveWithInfer): + """ + DihedralEnergy: + + Calculate the potential energy caused by dihedral terms for each 4-atom pair. + Assume our system has N atoms and M dihedral terms. + + .. math:: + + E = k(1 + cos(n*phi - phi_0)) + + Inputs: + Same as operator DihedralForce(). + + Outputs: + - **ene** (Tensor, float32) - [M, ], the potential energy for each + dihedral term. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, dihedral_numbers): + self.dihedral_numbers = dihedral_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', + 'gamc', 'gams', 'pn'], + outputs=['ene']) + self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, + ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): + cls_name = self.name + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) + validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) + validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) + validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) + validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) + validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) + return [M,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, + ipn_type, pk_type, gamc_type, gams_type, pn_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) + + return pn_type + + +class DihedralAtomEnergy(PrimitiveWithInfer): + """ + DihedralAtomEnergy: + + Add the potential energy caused by dihedral terms to the total potential + energy of each atom. + + The calculation formula is the same as operator DihedralEnergy(). + + Inputs: + Same as operator DihedralEnergy(). + + Outputs: + - **ene** (Tensor, float32) - [N, ], the accumulated potential + energy for each atom. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, dihedral_numbers): + self.dihedral_numbers = dihedral_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', + 'gamc', 'gams', 'pn'], + outputs=['ene']) + self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, + ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): + cls_name = self.name + N = uint_crd_f_shape[0] + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) + validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) + validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) + validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) + validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) + validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) + return [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, + ipn_type, pk_type, gamc_type, gams_type, pn_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) + + return pn_type + + +class DihedralForceWithAtomEnergy(PrimitiveWithInfer): + """ + DihedralForceWithAtomEnergy: + + Calculate dihedral force and potential energy together. + + The calculation formula is the same as operator DihedralForce() and DihedralEnergy(). + + Inputs: + Same as operator DihedralForce(). + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], same as operator DihedralForce(). + - **ene** (Tensor, float32) - [N, ], same as operator DihedralAtomEnergy(). + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, dihedral_numbers): + self.dihedral_numbers = dihedral_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'atom_d', 'ipn', 'pk', + 'gamc', 'gams', 'pn'], + outputs=['frc_f', 'ene']) + self.add_prim_attr('dihedral_numbers', self.dihedral_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, atom_d_shape, + ipn_shape, pk_shape, gamc_shape, gams_shape, pn_shape): + cls_name = self.name + N = uint_crd_f_shape[0] + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(atom_d_shape[0], M, Rel.EQ, "atom_d_shape", cls_name) + validator.check_int(ipn_shape[0], M, Rel.EQ, "ipn_shape", cls_name) + validator.check_int(pk_shape[0], M, Rel.EQ, "pk_shape", cls_name) + validator.check_int(gamc_shape[0], M, Rel.EQ, "gamc_shape", cls_name) + validator.check_int(gams_shape[0], M, Rel.EQ, "gams_shape", cls_name) + validator.check_int(pn_shape[0], M, Rel.EQ, "pn_shape", cls_name) + return uint_crd_f_shape, [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, atom_d_type, + ipn_type, pk_type, gamc_type, gams_type, pn_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_d_type', atom_d_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('ipn_type', ipn_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('pk_type', pk_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gamc_type', gamc_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('gams_type', gams_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('pn_type', pn_type, [mstype.float32], self.name) + + return pn_type, pn_type + + +class AngleForce(PrimitiveWithInfer): + """ + AngleForce: + + Calculate the force exerted by angles made of 3 atoms on the + corresponding atoms. Assume the number of angles is M and the + number of atoms is N. + + .. math:: + + dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) + dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) + theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|) + F_a = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{ab}|^2*dr_{ab} + - 1/|dr_{ab}|/|dr_{cb}|*dr_{cb}] + F_c = -2*k*(theta-theta_0)/sin(theta)*[cos(theta)/|dr_{cb}|^2*dr_{cb} + - 1/|dr_{cb}|/|dr_{ab}|*dr_{ab}] + F_b = -F_a - F_c + + Inputs: + - **angle_numbers** (int32) - the number of angles M. + - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate + value of each atom. + - **scaler_f** (Tensor, float32) - [3, ], the 3-D scale factor between + the real space float coordinates and the unsigned int coordinates. + - **atom_a** (Tensor, int32) - [M, ], the 1st atom index of each angle. + - **atom_b** (Tensor, int32) - [M, ], the 2nd and the central atom index + of each angle. + - **atom_c** (Tensor, int32) - [M, ], the 3rd atom index of each angle. + - **angle_k** (Tensor, float32) - [M, ], the force constant for each angle. + - **angle_theta0** (Tensor, float32) - [M, ], the equilibrium position value + for each angle. + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, angle_numbers): + self.angle_numbers = angle_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', + 'angle_theta0'], + outputs=['frc_f']) + self.add_prim_attr('angle_numbers', self.angle_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, + angle_theta0_shape): + cls_name = self.name + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) + validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) + return uint_crd_f_shape + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, + angle_theta0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) + return angle_k_type + + +class AngleEnergy(PrimitiveWithInfer): + """ + AngleEnergy: + + Calculate the energy caused by 3-atoms angle term. + + .. math:: + + dr_{ab} = (x_b-x_a, y_b-y_a, z_b-z_a) + dr_{cb} = (x_b-x_c, y_b-y_c, z_b-z_c) + theta = arccos(inner_product(dr_{ab}, dr_{cb})/|dr_{ab}|/|dr_{cb}|) + E = k*(theta - theta_0)^2 + + Inputs: + Same as operator AngleForce(). + + Outputs: + - **ene** (Tensor, float32) - [M, ], the potential energy for + each angle term. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, angle_numbers): + self.angle_numbers = angle_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', + 'angle_theta0'], + outputs=['ene']) + self.add_prim_attr('angle_numbers', self.angle_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, + angle_theta0_shape): + cls_name = self.name + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) + validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) + return [M,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, + angle_theta0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) + return angle_k_type + + +class AngleAtomEnergy(PrimitiveWithInfer): + """ + AngleAtomEnergy: + + Add the potential energy caused by angle terms to the total potential + energy of each atom. + + The calculation formula is the same as operator AngleEnergy(). + + Inputs: + Same as operator AngleForce(). + + Outputs: + - **ene** (Tensor, float32) - [N, ], the accumulated potential energy + for each atom. + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, angle_numbers): + self.angle_numbers = angle_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', + 'angle_theta0'], + outputs=['ene']) + self.add_prim_attr('angle_numbers', self.angle_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, + angle_theta0_shape): + cls_name = self.name + N = uint_crd_f_shape[0] + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) + validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) + return [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, + angle_theta0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) + return angle_k_type + + +class AngleForceWithAtomEnergy(PrimitiveWithInfer): + """ + AngleForceWithAtomEnergy: + + Calculate angle force and potential energy together. + + The calculation formula is the same as operator AngleForce() and AngleEnergy(). + + Inputs: + Same as operator AngleForce(). + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], same as operator AngleForce(). + - **ene** (Tensor, float) - [N, ], same as operator AngleAtomEnergy(). + + Supported Platforms: + ``GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, angle_numbers): + self.angle_numbers = angle_numbers + self.init_prim_io_names(inputs=['uint_crd_f', 'scaler_f', 'atom_a', 'atom_b', 'atom_c', 'angle_k', + 'angle_theta0'], + outputs=['frc_f', 'ene']) + self.add_prim_attr('angle_numbers', self.angle_numbers) + + def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape, + angle_theta0_shape): + cls_name = self.name + N = uint_crd_f_shape[0] + M = atom_a_shape[0] + validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape", cls_name) + validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name) + validator.check_int(atom_a_shape[0], M, Rel.EQ, "atom_a_shape", cls_name) + validator.check_int(atom_b_shape[0], M, Rel.EQ, "atom_b_shape", cls_name) + validator.check_int(atom_c_shape[0], M, Rel.EQ, "atom_c_shape", cls_name) + validator.check_int(angle_k_shape[0], M, Rel.EQ, "angle_k_shape", cls_name) + validator.check_int(angle_theta0_shape[0], M, Rel.EQ, "angle_theta0_shape", cls_name) + return uint_crd_f_shape, [N,] + + def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type, + angle_theta0_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('scaler_f_type', scaler_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_a_type', atom_a_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_b_type', atom_b_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_c_type', atom_c_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('angle_k_type', angle_k_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('angle_theta0_type', angle_theta0_type, [mstype.float32], self.name) + return angle_k_type, angle_k_type + + +class Dihedral14LJForce(PrimitiveWithInfer): + """ + Dihedral14LJForce: + + Calculate the Lennard-Jones part of 1,4 dihedral force correction for + each necessary dihedral terms on the corresponding atoms. Assume the + number of necessary dihedral 1,4 terms is M, the number of atoms is N, + and the number of Lennard-Jones types for all atoms is P, which means + there will be Q = P*(P+1)/2 types of possible Lennard-Jones interactions + for all kinds of atom pairs. + + .. math:: + + dr = (x_a-x_b, y_a-y_b, z_a-z_b) + F = k*(-12*A/|dr|^{14} + 6*B/|dr|^{8})*dr + + Inputs: + - **dihedral_14_numbers** (int32) - the number of necessary dihedral + 1,4 terms M. + - **atom_numbers** (int32) - the number of atoms N. + - **uint_crd_f** (Tensor, uint32) - [N, 3], the unsigned int coordinate + value of each atom. + - **LJ_type** (Tensor, int32) - [N,], the Lennard-Jones type of each + atom. + - **charge** (Tensor, float32) - [N,], the charge of each atom. + - **boxlength_f** (Tensor, float32) - [3,], the length of molecular + simulation box in 3 dimensions. + - **a_14** (Tensor, int32) - [M,], the first atom index of each dihedral + 1,4 term. + - **b_14** (Tensor, int32) - [M,], the second atom index of each dihedral + 1,4 term. + - **lj_scale_factor** (Tensor, float32) - [M,], the scale factor for the + Lennard-Jones part of force correction of each dihedral 1,4 term. + - **LJ_type_A** (Tensor, float32) - [Q,], the A parameter in Lennard-Jones + scheme of each atom pair type. + - **LJ_type_B** (Tensor, float32) - [Q,], the B parameter in Lennard-Jones + shceme of each atom pair type. + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor', + 'LJ_type_A', 'LJ_type_B'], + outputs=['frc_f']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + lj_scale_factor_shape, + LJ_type_A_shape, LJ_type_B_shape): + return uint_crd_f_shape + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + + validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name) + return LJ_type_B_type + + +class Dihedral14LJEnergy(PrimitiveWithInfer): + """ + Dihedral14LJEnergy: + + Calculate the Lennard-Jones part of 1,4 dihedral energy correction for + each necessary dihedral terms on the corresponding atoms. + + .. math:, the : + + dr = (x_a-x_b, y_a-y_b, z_a-z-b) + E = k*(A/|dr|^{12} - B/|dr|^{6}) + + Inputs: + Same as operator Dihedral14LJForce(). + + Outputs: + - **ene** (Tensor, float32) - [M,], the Lennard-Jones potential + energy correction for each necessary dihedral 1,4 term. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor', + 'LJ_type_A', 'LJ_type_B'], + outputs=['ene']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + lj_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape): + return [self.dihedral_14_numbers,] + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name) + + return LJ_type_A_type + + +class Dihedral14LJForceWithDirectCF(PrimitiveWithInfer): + """ + Dihedral14LJForceWithDirectCF: + + Calculate the Lennard-Jones part and the Coulomb part of force correction + for each necessary dihedral 1,4 terms. + + The calculation formula of the Lennard-Jones part is the same as operator + Dihedral14LJForce(), and the Coulomb part is as follows: + + .. math:: + + dr = (x_a-x_b, y_a-y_b, z_a-z_b) + F = -k*q_a*q_b/|r|^3*dr + + Inputs: + - **cf_scale_factor** (Tensor, float) - [M,], the scale factor for the + Coulomb part of force correction for each dihedral 1,4 terms. + + The rest of the inputs is the same as operator Dihedral14LJForce(). + + Outputs: + - **frc_f** (Tensor, float) - [N, 3], the force felt by each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor', + 'cf_scale_factor', + 'LJ_type_A', 'LJ_type_B'], + outputs=['frc_f']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + lj_scale_factor_shape, cf_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape): + return [self.atom_numbers, 3] + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + lj_scale_factor_type, cf_scale_factor_type, LJ_type_A_type, LJ_type_B_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name) + + return LJ_type_A_type + + +class Dihedral14LJCFForceWithAtomEnergy(PrimitiveWithInfer): + """ + Dihedral14LJCFForceWithAtomEnergy: + + Calculate the Lennard-Jones and Coulumb energy correction and force correction + for each necessary dihedral 1,4 terms together and add them to the total force + and potential energy for each atom. + + The calculation formula of force correction is the same as operator + Dihedral14LJForceWithDirectCF(), and the energy correction part is the same + as operator Dihedral14LJEnergy() and Dihedral14CFEnergy(). + + Inputs: + Same as operator Dihedral14LJForceWithdirectCF(). + + Outputs: + - **frc_f** (Tensor, float32) - [N, 3], the force felt by each atom. + - **atom_energy** (Tensor, float32) - [N,], the accumulated potential + energy for each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor', + 'cf_scale_factor', + 'LJ_type_A', 'LJ_type_B'], + outputs=['frc_f', 'atom_energy']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + lj_scale_factor_shape, cf_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape): + return uint_crd_f_shape, charge_shape + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + lj_scale_factor_type, cf_scale_factor_type, LJ_type_A_type, LJ_type_B_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name) + + return charge_dtype, charge_dtype + + +class Dihedral14LJAtomEnergy(PrimitiveWithInfer): + """ + Dihedral14LJAtomEnergy: + + Add the potenrial energy caused by Lennard-Jones energy correction for each + necessary dihedral 1,4 terms to the total potential energy of each atom. + + The calculation formula is the same as operator Dihedral14LJEnergy(). + + Inputs: + Same as operator Dihedral14LJForce(). + + Outputs: + - **ene** (Tensor, float32) - [N,], the accumulated potential energy of + each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'lj_scale_factor', + 'LJ_type_A', 'LJ_type_B'], + outputs=['ene']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + lj_scale_factor_shape, LJ_type_A_shape, LJ_type_B_shape): + return LJtype_shape + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + lj_scale_factor_type, LJ_type_A_type, LJ_type_B_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('lj_scale_factor_type', lj_scale_factor_type, [mstype.float32], + self.name) + validator.check_tensor_dtype_valid('LJ_type_A_type', LJ_type_A_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('LJ_type_B_type', LJ_type_B_type, [mstype.float32], self.name) + + return LJ_type_A_type + + +class Dihedral14CFEnergy(PrimitiveWithInfer): + """ + Dihedral14CFEnergy: + + Calculate the Coulumb part of 1,4 dihedral energy correction for + each necessary dihedral terms on the corresponding atoms. + + .. math:: + + dr = (x_a-x_b, y_a-y_b, z_a-z_b) + E = k*q_a*q_b/|dr| + + Inputs: + The meaning and type of each input is the same as that of operator + Dihedral14LJForceWithDirectCF(). + + Outputs: + - **ene** (Tensor, float32) - [M,], the accumulated potential energy + of each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'cj_scale_factor'], + outputs=['ene']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + cf_scale_factor_shape): + return [self.dihedral_14_numbers,] + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + cf_scale_factor_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('lj_scale_factor_type', cf_scale_factor_type, [mstype.float32], + self.name) + + return charge_dtype + + +class Dihedral14CFAtomEnergy(PrimitiveWithInfer): + """ + Dihedral14CFAtomEnergy: + + Add the potential energy caused by Coulumb energy correction for each + necessary dihedral 1,4 terms to the total potential energy of each atom. + + The calculation formula is the same as operator Dihedral14CFEnergy(). + + Inputs: + The meaning and type of each input is the same as that of operator + Dihedral14LJForceWithDirectCF(). + + Outputs: + - **ene** (Tensor, float32) - [N,], the accumulated potential energy + of each atom. + + + Supported Platforms: + ``GPU`` + Examples: + """ + + + @prim_attr_register + def __init__(self, nb14_numbers, atom_numbers): + self.dihedral_14_numbers = nb14_numbers + self.atom_numbers = atom_numbers + + self.init_prim_io_names( + inputs=['uint_crd_f', 'LJtype', 'charge', 'boxlength_f', 'a_14', 'b_14', 'cf_scale_factor'], + outputs=['ene']) + self.add_prim_attr('dihedral_14_numbers', self.dihedral_14_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + + def infer_shape(self, uint_crd_f_shape, LJtype_shape, charge_shape, boxlength_f_shape, a_14_shape, b_14_shape, + cf_scale_factor_shape): + return LJtype_shape + + def infer_dtype(self, uint_crd_f_dtype, LJtype_dtype, charge_dtype, boxlength_f_type, a_14_type, b_14_type, + cf_scale_factor_type): + validator.check_tensor_dtype_valid('uint_crd_f_dtype', uint_crd_f_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype_dtype', LJtype_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge_dtype', charge_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('boxlength_f_type', boxlength_f_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('a_14_type', a_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('b_14_type', b_14_type, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('cf_scale_factor_type', cf_scale_factor_type, [mstype.float32], + self.name) + + return charge_dtype + + +class MDIterationLeapFrog(PrimitiveWithInfer): + """ + MDIterationLeapFrog: + + One step of classical leap frog algorithm to solve the finite difference + Hamiltonian equations of motion for certain system, using Langevin dynamics + with Liu's thermostat scheme. Assume the number of atoms is N and the target + control temperature is T. + + Detailed iteration formula can be found in this paper: A unified thermostat + scheme for efficient configurational sampling for classical/quantum canonical + ensembles via molecular dynamics. DOI: 10.1063/1.4991621. + + Inputs: + - **float4_numbers** (int32) - total length to store random numbers. + - **atom_numbers** (int32) - the number of atoms N. + - **dt** (float32) - time step for finite difference. + - **half_dt** (float32) - half of time step for finite difference. + - **exp_gamma** (float32) - parameter in Liu's dynamic, equals + exp(-gamma_ln * dt), where gamma_ln is the firction factor in Langvin + dynamics. + - **max_velocity** (float32) - the upper limit of velocity, when the + veclocity overflows, scale it to the upper limit. + - **is_max_velocity** (int32) - whether the max velocity control is + open or not. + + - **mass_inverse** (Tensor, float32) - [N,], the inverse value of + mass of each atom. + - **sqrt_mass** (Tensor, float32) - [N,], the inverse square root value + of effect mass in Liu's dynamics of each atom. + + Outputs: + - **vel** (Tensor, float32) - [N, 3], the velocity of each atom. + - **crd** (Tensor, float32) - [N, 3], the coordinate of each atom. + - **frc** (Tensor, float32) - [N, 3], the force felt by each atom. + - **acc** (Tensor, float32) - [N, 3], the acceleration of each atom. + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity): + self.float4_numbers = float4_numbers + self.atom_numbers = atom_numbers + self.half_dt = half_dt + self.dt = dt + self.exp_gamma = exp_gamma + self.is_max_velocity = is_max_velocity + self.max_velocity = max_velocity + + self.init_prim_io_names( + inputs=['mass_inverse', 'sqrt_mass'], + outputs=['vel', 'crd', 'frc', 'acc']) + self.add_prim_attr('float4_numbers', self.float4_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('half_dt', self.half_dt) + self.add_prim_attr('dt', self.dt) + self.add_prim_attr('exp_gamma', self.exp_gamma) + self.add_prim_attr('is_max_velocity', self.is_max_velocity) + self.add_prim_attr('max_velocity', self.max_velocity) + + def infer_shape(self, mass_inverse_shape, sqrt_mass_shape): + return [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3] + + def infer_dtype(self, mass_inverse_dtype, sqrt_mass_dtype): + validator.check_tensor_dtype_valid('mass_inverse_dtype', mass_inverse_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('sqrt_mass_dtype', sqrt_mass_dtype, [mstype.float32], self.name) + + return mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype + + +class PMEReciprocalForce(PrimitiveWithInfer): + """ + PMEReciprocalForce: + + Calculate the reciprocal part of long-range Coulumb force using + PME(Particle Meshed Ewald) method. Assume the number of atoms is + N. + + The detailed calculation formula of PME(Particle Meshed Ewald) method + can be found in this paper: A Smooth Particle Mesh Ewald Method. DOI: + 10.1063/1.470117. + + Inputs: + - **atom_numbers** (int32) - the number of atoms, N. + - **beta** (float32) - the PME beta parameter, determined by the + non-bond cutoff value and simulation precision tolerance. + - **fftx** (int32) - the number of points for Fourier transform + in dimension X. + - **ffty** (int32) - the number of points for Fourier transform + in dimension Y. + - **fftz** (int32) - the number of points for Fourier transform + in dimension Z. + + - **boxlength** (Tensor, float32) - [3,], the length of simulation + box in 3 dimensions. + - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinates + value of each atom. + - **charge** (Tensor, float32) - [N,], the charge carried by each + atom. + + Outputs: + - **force** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ```GPU``` + + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, beta, fftx, ffty, fftz): + self.atom_numbers = atom_numbers + self.beta = beta + self.fftx = fftx + self.ffty = ffty + self.fftz = fftz + self.init_prim_io_names(inputs=['boxlength', 'uint_crd', 'charge'], + outputs=['force']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('beta', self.beta) + self.add_prim_attr('fftx', self.fftx) + self.add_prim_attr('ffty', self.ffty) + self.add_prim_attr('fftz', self.fftz) + + def infer_shape(self, boxlength_shape, uint_crd_shape, charge_shape): + return uint_crd_shape + + def infer_dtype(self, boxlength_type, uint_crd_type, charge_type): + validator.check_tensor_dtype_valid('boxlength_type', boxlength_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('uint_crd_type', uint_crd_type, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('charge_type', charge_type, [mstype.float32], self.name) + return charge_type + + +class PMEExcludedForce(PrimitiveWithInfer): + """ + PMEExcludedForce: + + Calculate the excluded part of long-range Coulumb force using + PME(Particle Meshed Ewald) method. Assume the number of atoms is + N, and the length of excluded list is E. + + Inputs: + - **atom_numbers** (int32) - the number of atoms, N. + - **beta** (float32) - the PME beta parameter, determined by the + non-bond cutoff value and simulation precision tolerance. + - **scaler** (Tensor, float32) - [3,], the scale factor between real space + coordinates and its unsigned int value. + - **excluded_list_start** (Tensor, int32) - [N,], the start excluded index + in excluded list for each atom. + - **excluded_numbers** (Tensor, int32) - [N,], the number of atom excluded + in excluded list for each atom. + - **excluded_list** (Tensor, int32) - [E,], the contiguous join of excluded + list of each atom. + + The rest of the input is the same as that of operator PMEReciprocalForce(). + + Outputs: + - **force** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ```GPU``` + + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, beta): + self.atom_numbers = atom_numbers + self.beta = beta + self.init_prim_io_names( + inputs=['uint_crd', 'sacler', 'charge', 'excluded_list_start', 'excluded_list', 'excluded_atom_numbers'], + outputs=['force']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('beta', self.beta) + + def infer_shape(self, uint_crd_shape, sacler_shape, charge_shape, excluded_list_start_shape, excluded_list_shape, + excluded_atom_numbers_shape): + return uint_crd_shape + + def infer_dtype(self, uint_crd_type, sacler_type, charge_type, excluded_list_start_type, excluded_list_type, + excluded_atom_numbers_type): + validator.check_tensor_dtype_valid('sacler_type', sacler_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('uint_crd_type', uint_crd_type, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('charge_type', charge_type, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('excluded_list_start_type', excluded_list_start_type, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('excluded_list_type', excluded_list_type, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('excluded_atom_numbers_type', excluded_atom_numbers_type, [mstype.int32], + self.name) + return charge_type + + +class PMEEnergy(PrimitiveWithInfer): + """ + PMEEnergy: + + Calculate the Coulumb energy of the system using PME method. + + .. math:: + + E = sum_{ij} q_iq_j/r_{ij} + + Inputs: + Same as that of operator PMEReciprocalForce(), PMEExcludedForce() + and PMEDirectAtomEnergy(). + + Outputs: + - **reciprocal_ene** (float32) - the reciprocal term of PME energy. + - **self_ene** (float32) - the self term of PME energy. + - **direct_ene** (float32) - the direct term of PME energy. + - **correction_ene** (float32) - the correction term of PME energy. + + Supported Platforms: + ```GPU`` + + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, beta, fftx, ffty, fftz): + self.atom_numbers = atom_numbers + self.beta = beta + self.fftx = fftx + self.ffty = ffty + self.fftz = fftz + self.init_prim_io_names( + inputs=['box_length', 'uint_crd', 'charge', 'nl_numbers', 'nl_serial', 'scaler', 'excluded_list_start', + 'excluded_list', 'excluded_atom_numbers'], + outputs=['reciprocal_ene', 'self_ene', 'direct_ene', 'correction_ene']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('beta', self.beta) + self.add_prim_attr('fftx', self.fftx) + self.add_prim_attr('ffty', self.ffty) + self.add_prim_attr('fftz', self.fftz) + + def infer_shape(self, box_length, uint_crd, charge, nl_numbers, nl_serial, scaler, excluded_list_start, + excluded_list, excluded_atom_numbers): + return (1,), (1,), (1,), (1,) + + def infer_dtype(self, box_length, uint_crd, charge, nl_numbers, nl_serial, scaler, excluded_list_start, + excluded_list, excluded_atom_numbers): + validator.check_tensor_dtype_valid('box_length', box_length, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('excluded_list', excluded_list, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('excluded_atom_numbers', excluded_atom_numbers, [mstype.int32], + self.name) + return charge, charge, charge, charge + + +class LJEnergy(PrimitiveWithInfer): + """ + LJEnergy: + + Calculate the Van der Waals interaction energy described by Lennard-Jones + potential for each atom. Assume the number of atoms is N, and the number + of Lennard-Jones types for all atoms is P, which means there will be + Q = P*(P+1)/2 types of possible Lennard-Jones interactions for all kinds + of atom pairs. + + + .. math:: + + dr = (x_a-x_b, y_a-y_b, z_a-z_b) + E = A/|dr|^{12} - B/|dr|^{6} + + Inputs: + - **atom_numbers** (int32) - the number of atoms, N. + - **cutoff_square** (float32) - the square value of cutoff. + - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinate + value of each atom. + - **LJtype** (Tensor, int32) - [N,], the Lennard-Jones type of each + atom. + - **charge** (Tensor, float32) - [N,], the charge carried by each + atom. + - **scaler** (Tensor, float32) - [3,], the scale factor between real + space coordinate and its unsigned int value. + - **nl_numbers** - (Tensor, int32) - [N,], the each atom. + - **nl_serial** - (Tensor, int32) - [N, 800], the neighbor list of each atom, + the max number is 800. + - **d_LJ_A** (Tensor, float32) - [Q,], the Lennard-Jones A coefficient + of each kind of atom pair. + - **d_LJ_B** (Tensor, float32) - [Q,], the Lennard-Jones B coefficient + of each kind of atom pair. + + Outputs: + - **d_LJ_energy_atom** (Tensor, float32) - [N,], the Lennard-Jones + potential energy of each atom. + - **d_LJ_energy_sum** (float32), the sum of Lennard-Jones potential + energy of each atom. + + Supported Platforms: + ```GPU``` + + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, cutoff_square): + self.atom_numbers = atom_numbers + self.cutoff_square = cutoff_square + self.init_prim_io_names( + inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'], + outputs=['d_LJ_energy_atom']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('cutoff_square', self.cutoff_square) + + def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + return charge + + def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name) + return charge + + +class LJForce(PrimitiveWithInfer): + """ + LJForce: + + Calculate the Van der Waals interaction force described by Lennard-Jones + potential energy for each atom. + + .. math:: + + dr = (x_a-x_b, y_a-y_b, z_a-z_b) + F = (-12*A/|dr|^{14} + 6*B/|dr|^{8}) * dr + + Inputs: + Same as operator LJEnergy(). + + outputs: + - **frc** (Tensor, float32) - [N, 3], the force felt by each atom. + + Supported Platforms: + ```GPU``` + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, cutoff_square): + self.atom_numbers = atom_numbers + self.cutoff_square = cutoff_square + self.init_prim_io_names( + inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'], + outputs=['frc']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('cutoff_square', self.cutoff_square) + + def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + return uint_crd + + def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name) + return charge + + +class LJForceWithPMEDirectForce(PrimitiveWithInfer): + """ + LJForceWithPMEDirectForce: + + Calculate the Lennard-Jones force and PME direct force together. + + The calculation formula of Lennard-Jones part is the same as operator + LJForce(), and the PME direct part is within PME method. + + Inputs: + - **pme_beta** (float32), PME beta parameter, same as operator + PMEReciprocalForce(). + + the rest of the input is the same as operator LJForce(). + + Outputs: + - **frc** (Tensor, float32), [N, 3], the force felt by each atom. + + Supported Platforms: + ```GPU``` + + Examples: + """ + + @prim_attr_register + def __init__(self, atom_numbers, cutoff, pme_beta): + self.atom_numbers = atom_numbers + self.cutoff = cutoff + self.pme_beta = pme_beta + self.init_prim_io_names( + inputs=['uint_crd', 'LJtype', 'charge', 'scaler', 'nl_numbers', 'nl_serial', 'd_LJ_A', 'd_LJ_B'], + outputs=['frc']) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('cutoff', self.cutoff) + self.add_prim_attr('pme_beta', self.pme_beta) + + def infer_shape(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + return uint_crd + + def infer_dtype(self, uint_crd, LJtype, charge, scaler, nl_numbers, nl_serial, d_LJ_A, d_LJ_B): + validator.check_tensor_dtype_valid('uint_crd', uint_crd, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('LJtype', LJtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('charge', charge, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('scaler', scaler, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('nl_numbers', nl_numbers, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_serial', nl_serial, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('d_LJ_A', d_LJ_A, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('d_LJ_B', d_LJ_B, [mstype.float32], self.name) + return charge + + +class GetCenterOfGeometry(PrimitiveWithInfer): + """ + GetCenterOfGeometry: + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, center_numbers, center_numbers_inverse): + self.center_numbers = center_numbers + self.center_numbers_inverse = center_numbers_inverse + self.add_prim_attr('center_numbers', self.center_numbers) + self.add_prim_attr('center_numbers_inverse', self.center_numbers_inverse) + self.init_prim_io_names( + inputs=['center_atoms', 'crd_f'], + outputs=['center_of_geometry_f']) + + def infer_shape(self, center_atoms_shape, crd_f_shape): + cls_name = self.name + N = self.center_numbers + validator.check_int(center_atoms_shape[0], N, Rel.EQ, "center_atoms_shape", cls_name) + validator.check_int(crd_f_shape[0], N, Rel.EQ, "crd_f_shape", cls_name) + validator.check_int(crd_f_shape[1], 3, Rel.EQ, "crd_f_shape", cls_name) + return [3,] + + def infer_dtype(self, center_atoms_dtype, crd_f_dtype): + validator.check_tensor_dtype_valid('center_atoms_dtype', center_atoms_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('crd_f_dtype', crd_f_dtype, [mstype.float32], self.name) + + return crd_f_dtype + + +class MDTemperature(PrimitiveWithInfer): + """ + MDTemperature: + + Supported Platforms: + ``GPU`` + Examples: + """ + + @prim_attr_register + def __init__(self, residue_numbers): + self.residue_numbers = residue_numbers + self.add_prim_attr('residue_numbers', self.residue_numbers) + self.init_prim_io_names( + inputs=['start', 'end', 'atom_vel_f', 'atom_mass'], + outputs=['ek']) + + def infer_shape(self, start_shape, end_shape, atom_vel_f_shape, atom_mass_shape): + cls_name = self.name + N = self.residue_numbers + validator.check_int(start_shape[0], N, Rel.EQ, "start_shape", cls_name) + validator.check_int(end_shape[0], N, Rel.EQ, "end_shape", cls_name) + validator.check_int(atom_vel_f_shape[0], N, Rel.EQ, "atom_vel_f_shape", cls_name) + validator.check_int(atom_vel_f_shape[1], 3, Rel.EQ, "atom_vel_f_shape", cls_name) + validator.check_int(atom_mass_shape[0], N, Rel.EQ, "atom_mass_shape", cls_name) + return [N,] + + def infer_dtype(self, start_dtype, end_dtype, atom_vel_f_dtype, atom_mass_dtype): + validator.check_tensor_dtype_valid('start_dtype', start_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('end_dtype', end_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('atom_vel_f_dtype', atom_vel_f_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('atom_mass_dtype', atom_mass_dtype, [mstype.float32], self.name) + return atom_mass_dtype + + +class NeighborListUpdate(PrimitiveWithInfer): + """ + NeighborListUpdate + + Update (or construct if first time) the Verlet neighbor list for the + calculation of short-ranged force. Assume the number of atoms is N, + the number of grids divided is G, the maximum number of atoms in one + grid is M, the maximum number of atoms in single atom's neighbor list + is L, and the number of total atom in excluded list is E. + + Inputs: + - **grid_numbers** (int32) - the total number of grids divided. + - **refresh_count** (int32) - the counter which counts how many + iteration steps have passed since last update. + - **not_first_time** (int32) - whether to construct the neighbor + list first time or not. + - **Nxy** (int32) - the total number of grids divided in xy plane. + - **excluded_atom_numbers** (int32) - the total atom numbers in + the excluded list. + - **cutoff** (float32) - the cutoff distance for short-range force + calculation. + - **skin** (float32) - the overflow value of cutoff to maintain a + neighbor list. + - **cutoff_square** (float32) - the suqare value of cutoff. + - **half_skin_square** (float32) - skin*skin/4, indicates the maximum + square value of the distance atom allowed to move between two updates. + - **cutoff_with_skin** (float32) - cutoff + skin, indicates the + radius of the neighbor list for each atom. + - **half_cutoff_with_skin** (float32) - cutoff_with_skin/2. + - **cutoff_with_skin_square** (float32) - the square value of + cutoff_with_skin. + - **refresh_interval** (int32) - the number of iteration steps + between two updates of neighbor list. + - **max_atom_in_grid_numbers** (int32) - the maximum number of atoms + in one grid. + + - **atom_numbers_in_grid_bucket** (Tensor, int32) - [G,], the number + of atoms in each grid bucket. + - **bucket** (Tensor, int32) - (Tensor,int32) - [G, M], the atom + indices in each grid bucket. + - **crd** (Tensor, float32) - [N,], the coordinates of each atom. + - **box_length** (Tensor, float32) - [3,], the length of 3 dimensions + of the simulation box. + - **grid_N** (Tensor, int32) - [3,], the number of grids divided of + 3 dimensions of the simulation box. + - **grid_length_inverse** (float32) - the inverse value of grid length. + - **atom_in_grid_serial** (Tensor, int32) - [N,], the grid index for + each atom. + - **old_crd** (Tensor, float32) - [N, 3], the coordinates before update + of each atom. + - **crd_to_uint_crd_cof** (Tensor, float32) - [3,], the scale factor + between the unsigned int value and the real space coordinates. + - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinates + value fo each atom. + - **gpointer** (Tensor, int32) - [G, 125], the 125 nearest neighbor grids + (including self) of each grid. + - **nl_atom_numbers** (Tensor, int32) - [N,], the number of atoms in + neighbor list of each atom. + - **nl_atom_serial** (Tensor, int32) - [N, L], the indices of atoms in + neighbor list of each atom. + - **uint_dr_to_dr_cof** (Tensor, float32) - [3,], the scale factor between + the real space coordinates and the unsigned int value. + - **excluded_list_start** (Tensor, int32) - [N,], the start excluded + index in excluded list for each atom. + - **excluded_numbers** (Tensor, int32) - [N,], the number of atom excluded + in excluded list for each atom. + - **excluded_list** (Tensor, int32) - [E,], the contiguous join of excluded + list of each atom. + - **need_refresh_flag** (Tensor, int32) - [N,], whether the neighbor list + of each atom need update or not. + + Outputs: + - **res** (float32) + + Supported Platforms: + ```GPU``` + + Examples: + """ + + @prim_attr_register + def __init__(self, grid_numbers, atom_numbers, refresh_count, not_first_time, Nxy, excluded_atom_numbers, + cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square, + refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800): + self.grid_numbers = grid_numbers + self.atom_numbers = atom_numbers + self.refresh_count = refresh_count + self.refresh_interval = refresh_interval + self.not_first_time = not_first_time + self.cutoff = cutoff + self.skin = skin + self.max_atom_in_grid_numbers = max_atom_in_grid_numbers + self.Nxy = Nxy + self.excluded_atom_numbers = excluded_atom_numbers + self.cutoff_square = cutoff_square + self.half_skin_square = half_skin_square + self.cutoff_with_skin = cutoff_with_skin + self.half_cutoff_with_skin = half_cutoff_with_skin + self.cutoff_with_skin_square = cutoff_with_skin_square + self.max_neighbor_numbers = max_neighbor_numbers + self.init_prim_io_names( + inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_N', 'grid_length_inverse', + 'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers', + 'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers', + 'need_refresh_flag'], outputs=['res']) + + self.add_prim_attr('grid_numbers', self.grid_numbers) + self.add_prim_attr('atom_numbers', self.atom_numbers) + self.add_prim_attr('refresh_count', self.refresh_count) + self.add_prim_attr('refresh_interval', self.refresh_interval) + self.add_prim_attr('not_first_time', self.not_first_time) + self.add_prim_attr('cutoff', self.cutoff) + self.add_prim_attr('skin', self.skin) + self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers) + self.add_prim_attr('Nxy', self.Nxy) + self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers) + self.add_prim_attr('cutoff_square', self.cutoff_square) + self.add_prim_attr('half_skin_square', self.half_skin_square) + self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin) + self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin) + self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square) + + def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_N_shape, + grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape, + uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape, + uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape, + need_refresh_flag_shape): + assert len(atom_numbers_in_grid_bucket_shape) == 1 + assert len(bucket_shape) == 2 + assert len(crd_shape) == 2 + assert len(box_length_shape) == 1 + assert len(grid_N_shape) == 1 + assert len(grid_length_inverse_shape) == 1 + assert len(atom_in_grid_serial_shape) == 1 + assert len(old_crd_shape) == 2 + assert len(crd_to_uint_crd_cof_shape) == 1 + assert len(uint_crd_shape) == 2 + assert len(gpointer_shape) == 2 + assert len(nl_atom_numbers_shape) == 1 + assert len(nl_atom_serial_shape) == 2 + assert len(uint_dr_to_dr_cof_shape) == 1 + assert len(excluded_list_start_shape) == 1 + assert len(excluded_list_shape) == 1 + assert len(excluded_numbers_shape) == 1 + assert len(need_refresh_flag_shape) == 1 + + validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ, + "atom_numbers_in_grid_bucket_shape", self.name) + validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket_shape", self.name) + validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket_shape", self.name) + validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd_shape", self.name) + validator.check_int(crd_shape[1], 3, Rel.EQ, "crd_shape", self.name) + validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length_shape", self.name) + validator.check_int(grid_N_shape[0], 3, Rel.EQ, "grid_N_shape", self.name) + validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse_shape", self.name) + validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial_shape", + self.name) + validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd_shape", self.name) + validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd_shape", self.name) + validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof_shape", self.name) + validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd_shape", self.name) + validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd_shape", self.name) + validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer_shape", self.name) + validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer_shape", self.name) + validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers_shape", self.name) + validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial_shape", self.name) + validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial_shape", + self.name) + validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof_shape", self.name) + validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start_shape", + self.name) + validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list_shape", + self.name) + validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers_shape", self.name) + validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag_shape", self.name) + + return [1,] + + def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_N_dtype, + grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype, + uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype, + uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype, + need_refresh_flag_dtype): + validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket_dtype', atom_numbers_in_grid_bucket_dtype, + [mstype.int32], self.name) + validator.check_tensor_dtype_valid('bucket_dtype', bucket_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('crd_dtype', crd_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('box_length_dtype', box_length_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('grid_N_dtype', grid_N_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('grid_length_inverse_dtype', grid_length_inverse_dtype, [mstype.float32], + self.name) + validator.check_tensor_dtype_valid('atom_in_grid_serial_dtype', atom_in_grid_serial_dtype, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('old_crd_dtype', old_crd_dtype, [mstype.float32], self.name) + validator.check_tensor_dtype_valid('crd_to_uint_crd_cof_dtype', crd_to_uint_crd_cof_dtype, [mstype.float32], + self.name) + validator.check_tensor_dtype_valid('uint_crd_dtype', uint_crd_dtype, [mstype.uint32], self.name) + validator.check_tensor_dtype_valid('gpointer_dtype', gpointer_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_atom_numbers_dtype', nl_atom_numbers_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('nl_atom_serial_dtype', nl_atom_serial_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('uint_dr_to_dr_cof_dtype', uint_dr_to_dr_cof_dtype, [mstype.float32], + self.name) + validator.check_tensor_dtype_valid('excluded_list_start_dtype', excluded_list_start_dtype, [mstype.int32], + self.name) + validator.check_tensor_dtype_valid('excluded_list_dtype', excluded_list_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('excluded_numbers_dtype', excluded_numbers_dtype, [mstype.int32], self.name) + validator.check_tensor_dtype_valid('need_refresh_flag_dtype', need_refresh_flag_dtype, [mstype.int32], + self.name) + + return mstype.float32