[ROCM] fix gather_op, sigmoid_cross_entropy_with_logits_op, test=develop (#31467)

4 years ago · e03e46730c
parent b85c8e03be
commit e03e46730c
2 changed files with 11 additions and 0 deletions
--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
@ -29,7 +29,11 @@ namespace operators {
 using Tensor = framework::Tensor;
 #ifdef __HIPCC__
 static constexpr int kNumCUDAThreads = 256;
 #else
 static constexpr int kNumCUDAThreads = 512;
 #endif
 static constexpr int kNumMaxinumNumBlocks = 4096;
 static inline int NumBlocks(const int N) {
--- a/paddle/fluid/platform/cuda_helper.h
+++ b/paddle/fluid/platform/cuda_helper.h
@ -76,10 +76,17 @@ namespace platform {
 *
 */
 #ifdef __HIPCC__
 #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type)                     \
  int64_t __index__ = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; \
  for (index_type i = __index__; __index__ < (num);                   \
       __index__ += hipBlockDim_x * hipGridDim_x, i = __index__)
 #else
 #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type)            \
  int64_t __index__ = blockIdx.x * blockDim.x + threadIdx.x; \
  for (index_type i = __index__; __index__ < (num);          \
       __index__ += blockDim.x * gridDim.x, i = __index__)
 #endif
 #define CUDA_KERNEL_LOOP(i, num) CUDA_KERNEL_LOOP_TYPE(i, num, int)