From 416e47edef0aee8651d2d2cbf7f537f4e5aeae04 Mon Sep 17 00:00:00 2001 From: Qi Li Date: Wed, 10 Mar 2021 11:05:40 +0800 Subject: [PATCH] [ROCM] fix softmax with loss nan in HIP platform, test=develop (#31491) --- paddle/fluid/operators/softmax_with_cross_entropy_op.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index b36a5bf6dc..85c1b2feb5 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -398,7 +398,12 @@ static void HardLabelSoftmaxWithCrossEntropy( const platform::CUDADeviceContext& ctx, const T* logits_data, const int64_t* labels_data, T* loss_data, T* softmax_data, int64_t n, int64_t d, int axis_dim, int ignore_idx) { +#ifdef __HIPCC__ + // HIP platform will have loss nan if dim size > 256 + constexpr int kMaxBlockDim = 256; +#else constexpr int kMaxBlockDim = 512; +#endif int64_t block_dim = axis_dim >= kMaxBlockDim ? kMaxBlockDim : (1 << static_cast(std::log2(axis_dim)));