Add todo for reduceSum

tonyyang-svail-patch-1
chengduoZH 7 years ago
parent b8938b448c
commit 90dc33b5ff

@ -77,6 +77,12 @@ __forceinline__ __device__ T __shfl_down_sync(unsigned, T val, int delta) {
template <typename T>
__device__ T reduceSum(T val, int tid, int len) {
// TODO(zcd): The warp size should be taken from the
// parameters of the GPU but not specified as 32 simply.
// To make the reduceSum more efficiently,
// I use Warp-Level Parallelism and assume the Warp size
// is 32 which may be different for different GPU,
// but most card's warp size is 32.
__shared__ T shm[32];
const int warpSize = 32;
unsigned mask = 0u;

Loading…
Cancel
Save