parent
0e27a04da1
commit
6719169a7f
@ -1,33 +1,39 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterNdGpuFwdKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterNdGpuFwdKernel, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterNdGpuFwdKernel, int, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterNdGpuFwdKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterNdGpuFwdKernel, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterNdGpuFwdKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
ScatterNdGpuFwdKernel, short, int) // NOLINT
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterNdGpuFwdKernel, uchar, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
@ -1,70 +1,80 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
template <typename T, typename S>
|
||||
__global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t block_size, const size_t input_size,
|
||||
const size_t output_size, const size_t indices_dim_0, const size_t indices_dim_1,
|
||||
S *indices_stride, S *work_shape) {
|
||||
int i, j;
|
||||
for (int read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < input_size;
|
||||
read_index += blockDim.x * gridDim.x) {
|
||||
int write_index = 0;
|
||||
bool out_bound = false;
|
||||
|
||||
i = read_index / block_size;
|
||||
j = read_index % block_size;
|
||||
|
||||
for (size_t k = 0; k < indices_dim_1; k++) {
|
||||
S indices_i = indices[i * indices_dim_1 + k];
|
||||
out_bound |= indices_i >= work_shape[k];
|
||||
write_index += indices_i * indices_stride[k];
|
||||
}
|
||||
|
||||
write_index += j;
|
||||
out_bound |= write_index >= output_size;
|
||||
|
||||
if (!out_bound) {
|
||||
ms_atomic_add(&output[write_index], update[read_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const size_t &input_size,
|
||||
const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, S *indices_stride,
|
||||
S *work_shape, cudaStream_t stream) {
|
||||
ScatterNdKernel<<<GET_BLOCKS(output_size), GET_THREADS, 0, stream>>>(indices, update, output, block_size, input_size,
|
||||
output_size, indices_dim_0, indices_dim_1,
|
||||
indices_stride, work_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
template void ScatterNd<float, int>(int *indices, float *update, float *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
template void ScatterNd<half, int>(int *indices, half *update, half *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
template void ScatterNd<int, int>(int *indices, int *update, int *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
template <typename T, typename S>
|
||||
__global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t block_size, const size_t input_size,
|
||||
const size_t output_size, const size_t indices_dim_0, const size_t indices_dim_1,
|
||||
S *indices_stride, S *work_shape) {
|
||||
int i, j;
|
||||
for (int read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < input_size;
|
||||
read_index += blockDim.x * gridDim.x) {
|
||||
int write_index = 0;
|
||||
bool out_bound = false;
|
||||
|
||||
i = read_index / block_size;
|
||||
j = read_index % block_size;
|
||||
|
||||
for (size_t k = 0; k < indices_dim_1; k++) {
|
||||
S indices_i = indices[i * indices_dim_1 + k];
|
||||
out_bound |= indices_i >= work_shape[k];
|
||||
write_index += indices_i * indices_stride[k];
|
||||
}
|
||||
|
||||
write_index += j;
|
||||
out_bound |= write_index >= output_size;
|
||||
|
||||
if (!out_bound) {
|
||||
MsAtomicAdd(&output[write_index], update[read_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const size_t &input_size,
|
||||
const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, S *indices_stride,
|
||||
S *work_shape, cudaStream_t stream) {
|
||||
ScatterNdKernel<<<GET_BLOCKS(output_size), GET_THREADS, 0, stream>>>(indices, update, output, block_size, input_size,
|
||||
output_size, indices_dim_0, indices_dim_1,
|
||||
indices_stride, work_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
template void ScatterNd<float, int>(int *indices, float *update, float *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
template void ScatterNd<half, int>(int *indices, half *update, half *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
template void ScatterNd<int, int>(int *indices, int *update, int *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
// NOLINTNEXTLINE
|
||||
template void ScatterNd<short, int>(int *indices, short *update, short *output, const size_t &block_size,
|
||||
const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
template void ScatterNd<unsigned char, int>(int *indices, unsigned char *update, unsigned char *output,
|
||||
const size_t &block_size, const size_t &input_size,
|
||||
const size_t &output_size, const size_t &indices_dim_0,
|
||||
const size_t &indices_dim_1, int *indices_stride, int *work_shape,
|
||||
cudaStream_t stream);
|
||||
|
Loading…
Reference in new issue