fix the shape choose of vectorize for cuda

revert-31562-mean
wangchaochaohu 4 years ago committed by GitHub
parent a0b60716f1
commit 068d905e1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -348,7 +348,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
} else {
size_t thread_nums = 1024;
size_t block_nums = (width + thread_nums - 1) / thread_nums;
int vec_size = VectorizedSize<T>(dx_data);
int vec_size = VectorizedSize<T>(dout_data);
if (vec_size == 4 && width % 4 == 0) {
block_nums = (width / vec_size + thread_nums - 1) / thread_nums;
VecMatrixReduceLongWidth<T,

Loading…
Cancel
Save