primitive for im2col fix bug clang code format clang format fix fix pylint fix license delete useless codepull/3924/head
parent
657b547116
commit
4fce4c7c34
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "identity_impl.cuh"
|
||||
#include <iostream>
|
||||
template <typename T>
|
||||
__global__ void IdentityKernel(const size_t size, const size_t dim, T *output_addr) {
|
||||
for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
|
||||
size_t batchIdx = pointIdx / (dim * dim);
|
||||
size_t dst_x = (pointIdx - batchIdx * dim * dim) / dim;
|
||||
size_t dst_y = (pointIdx - batchIdx * dim * dim) % dim;
|
||||
if (dst_x == dst_y) {
|
||||
output_addr[pointIdx] = 1;
|
||||
} else {
|
||||
output_addr[pointIdx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Identity(const size_t size, const size_t dim, T *output_addr, cudaStream_t cuda_stream) {
|
||||
IdentityKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, dim, output_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
template void Identity<float>(const size_t size, const size_t dim, float *output_addr, cudaStream_t cuda_stream);
|
||||
|
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_IDENTITY_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_IDENTITY_H_
|
||||
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
template <typename T>
|
||||
void Identity(const size_t size, const size_t dim, T *output_addr, cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXSPLIT_H_
|
@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "matrix_combine_impl.cuh"
|
||||
#include <iostream>
|
||||
template <typename T>
|
||||
__global__ void MatrixCombineKernel(const size_t size, const size_t src_height, const size_t src_width,
|
||||
const size_t dst_width, T *input_addr, T *output_addr) {
|
||||
for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
|
||||
size_t batchIdx = pointIdx / (src_height * src_width);
|
||||
size_t src_h = (pointIdx - batchIdx * src_height * src_width) / src_width;
|
||||
size_t src_w = (pointIdx - batchIdx * src_height * src_width) % src_width;
|
||||
size_t dst_h = src_height * batchIdx + src_h;
|
||||
size_t dst_w = src_width * batchIdx + src_w;
|
||||
output_addr[dst_h * dst_width + dst_w] = input_addr[pointIdx];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void MatrixCombineKernel(const size_t size, const size_t src_height, const size_t src_width,
|
||||
const size_t dst_width, const size_t res_width, const size_t batch, T *input_addr,
|
||||
T *output_addr) {
|
||||
for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
|
||||
size_t batchIdx = pointIdx / (src_height * src_width);
|
||||
if (batchIdx != (batch - 1)) {
|
||||
size_t src_h = (pointIdx - batchIdx * src_height * src_width) / src_width;
|
||||
size_t src_w = (pointIdx - batchIdx * src_height * src_width) % src_width;
|
||||
size_t dst_h = src_height * batchIdx + src_h;
|
||||
size_t dst_w = src_width * batchIdx + src_w;
|
||||
output_addr[dst_h * dst_width + dst_w] = input_addr[pointIdx];
|
||||
} else {
|
||||
size_t src_h = (pointIdx - (batch - 1) * src_height * src_width) / res_width;
|
||||
size_t src_w = (pointIdx - (batch - 1) * src_height * src_width) % res_width;
|
||||
size_t src_coordinate = (batch - 1) * src_height * src_width + src_h * src_width + src_w;
|
||||
size_t dst_h = src_height * (batch - 1) + src_h;
|
||||
size_t dst_w = src_width * (batch - 1) + src_w;
|
||||
output_addr[dst_h * dst_width + dst_w] = input_addr[src_coordinate];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MatrixCombine(const size_t size, const size_t src_height, const size_t src_width, const size_t dst_width,
|
||||
const size_t residual, const size_t res_width, const size_t batch, T *input_addr, T *output_addr,
|
||||
cudaStream_t cuda_stream) {
|
||||
if (residual == 0) {
|
||||
MatrixCombineKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, src_height, src_width, dst_width,
|
||||
input_addr, output_addr);
|
||||
} else {
|
||||
MatrixCombineKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, src_height, src_width, dst_width,
|
||||
res_width, batch, input_addr, output_addr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template void MatrixCombine<float>(const size_t size, const size_t src_height, const size_t src_width,
|
||||
const size_t dst_width, const size_t residual, const size_t res_width,
|
||||
const size_t batch, float *input_addr, float *output_addr, cudaStream_t cuda_stream);
|
||||
|
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXCOMBINE_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXCOMBINE_H_
|
||||
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
template <typename T>
|
||||
void MatrixCombine(const size_t size, const size_t src_height, const size_t src_width, const size_t dst_width,
|
||||
const size_t residual, const size_t res_width, const size_t batch, T *input_addr, T *output_addr,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXCOMBINE_H_
|
||||
|
@ -0,0 +1,70 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "matrix_split_impl.cuh"
|
||||
#include <iostream>
|
||||
template <typename T>
|
||||
__global__ void MatrixSplitKernel(const size_t size, const size_t split_dim, const size_t dim, T *input_addr,
|
||||
T *output_addr) {
|
||||
for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
|
||||
size_t batchIdx = pointIdx / (split_dim * split_dim);
|
||||
size_t dst_x = (pointIdx - batchIdx * split_dim * split_dim) / split_dim;
|
||||
size_t dst_y = (pointIdx - batchIdx * split_dim * split_dim) % split_dim;
|
||||
size_t src_coordinate = (batchIdx * split_dim + dst_x) * dim + batchIdx * split_dim + dst_y;
|
||||
output_addr[pointIdx] = input_addr[src_coordinate];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void MatrixSplitKernel(const size_t size, const size_t split_dim, const size_t dim, const size_t res_dim,
|
||||
T *input_addr, T *output_addr) {
|
||||
for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
|
||||
size_t batchIdx = pointIdx / (split_dim * split_dim);
|
||||
size_t dst_x = (pointIdx - batchIdx * split_dim * split_dim) / split_dim;
|
||||
size_t dst_y = (pointIdx - batchIdx * split_dim * split_dim) % split_dim;
|
||||
size_t src_coordinate = (batchIdx * split_dim + dst_x) * dim + batchIdx * split_dim + dst_y;
|
||||
size_t batch_lower = dim / split_dim;
|
||||
if (batchIdx < batch_lower) {
|
||||
output_addr[pointIdx] = input_addr[src_coordinate];
|
||||
} else {
|
||||
if (dst_x < res_dim && dst_y < res_dim) {
|
||||
output_addr[pointIdx] = input_addr[src_coordinate];
|
||||
} else if (dst_x == dst_y) {
|
||||
output_addr[pointIdx] = 1;
|
||||
} else {
|
||||
output_addr[pointIdx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MatrixSplit(const size_t size, const size_t split_dim, const size_t dim, T *input_addr, T *output_addr,
|
||||
cudaStream_t cuda_stream) {
|
||||
size_t batch = dim / split_dim;
|
||||
size_t res_dim = dim - batch * split_dim;
|
||||
if (res_dim == 0) {
|
||||
MatrixSplitKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, split_dim, dim, input_addr, output_addr);
|
||||
} else {
|
||||
MatrixSplitKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, split_dim, dim, res_dim, input_addr,
|
||||
output_addr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template void MatrixSplit<float>(const size_t size, const size_t split_dim, const size_t dim, float *input_addr,
|
||||
float *output_addr, cudaStream_t cuda_stream);
|
||||
|
@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXSPLIT_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXSPLIT_H_
|
||||
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
template <typename T>
|
||||
void MatrixSplit(const size_t size, const size_t split_dim, const size_t dim, T *input_addr, T *output_addr,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MATRIXSPLIT_H_
|
@ -0,0 +1,26 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/nn/im2col_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(Im2Col, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
Im2ColGpuFwdKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(Im2Col, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
Im2ColGpuFwdKernel, half)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue