refine the compiler error for half2 operation (#29816)

revert-31562-mean
wangchaochaohu 5 years ago committed by GitHub
parent 82630408b4
commit 01c37c8e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -183,6 +183,7 @@ template <int SIZE>
__global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in, __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
__half2 *__restrict__ out, size_t width, __half2 *__restrict__ out, size_t width,
size_t height) { size_t height) {
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
int idx = threadIdx.x + blockIdx.x * blockDim.x; int idx = threadIdx.x + blockIdx.x * blockDim.x;
int by = blockIdx.y; int by = blockIdx.y;
__half2 zero = __half2half2(static_cast<__half>(0)); __half2 zero = __half2half2(static_cast<__half>(0));
@ -196,6 +197,7 @@ __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
atomicAdd(&(out[idx]), sum); atomicAdd(&(out[idx]), sum);
} }
#endif
} }
template <typename T> template <typename T>
@ -363,7 +365,6 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1); int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1);
int theory_block = (width + blocks.x - 1) / blocks.x; int theory_block = (width + blocks.x - 1) / blocks.x;
dim3 grids(std::min(theory_block, max_blocks)); dim3 grids(std::min(theory_block, max_blocks));
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
if (std::is_same<T, paddle::platform::float16>::value && width < 2048 && if (std::is_same<T, paddle::platform::float16>::value && width < 2048 &&
width % 2 == 0 && height % 64 == 0) { width % 2 == 0 && height % 64 == 0) {
auto &dev_ctx = auto &dev_ctx =
@ -381,7 +382,6 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
width, height); width, height);
return; return;
} }
#endif
if (width / height < 32) { if (width / height < 32) {
MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>( MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>(

Loading…
Cancel
Save