fix bug in SelectedRowsAddTensorKernel

8 years ago · 0d7b1fd4fe
parent ab5dc9fe18
commit 0d7b1fd4fe
1 changed files with 2 additions and 2 deletions
--- a/paddle/operators/math/selected_rows_functor.cu
+++ b/paddle/operators/math/selected_rows_functor.cu
@ -89,7 +89,7 @@ __global__ void SelectedRowsAddTensorKernel(const T* selected_rows,
    // Since index in rows of SelectedRows can be duplicate, we can not use
    // tensor_out[index] += selected_rows[index]; Instead, we have to use
    // AtomicAdd to avoid concurrent write error.
-    paddle::platform::CudaAtomicAdd(&tensor_out[index], selected_rows[index]);
+    paddle::platform::CudaAtomicAdd(tensor_out + index, selected_rows[index]);
  }
 }
 }  // namespace
@ -121,7 +121,7 @@ struct SelectedRowsAddTensor<platform::GPUPlace, T> {

    int block_size = 256;
    dim3 threads(block_size, 1);
-    dim3 grid(1, in1_height);
+    dim3 grid(1, in1_rows.size());
    SelectedRowsAddTensorKernel<
        T><<<grid, threads, 0,
             reinterpret_cast<const platform::CUDADeviceContext&>(context)