|
|
|
@ -129,6 +129,7 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
"must be either LoDTensor or SelectedRows");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int64_t padding_idx = context.Attr<int64_t>("padding_idx");
|
|
|
|
|
bool is_sparse = context.Attr<bool>("is_sparse");
|
|
|
|
|
// Since paddings are not trainable and fixed in forward, the gradient of
|
|
|
|
|
// paddings makes no sense and we don't deal with it in backward.
|
|
|
|
@ -187,10 +188,15 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
memset(d_table_data, 0, d_table->numel() * sizeof(T));
|
|
|
|
|
|
|
|
|
|
for (int64_t i = 0; i < ids->numel(); ++i) {
|
|
|
|
|
PADDLE_ENFORCE_LT(ids_data[i], N);
|
|
|
|
|
PADDLE_ENFORCE_GE(ids_data[i], 0);
|
|
|
|
|
for (int j = 0; j < D; ++j) {
|
|
|
|
|
d_table_data[ids_data[i] * D + j] += d_output_data[i * D + j];
|
|
|
|
|
if (padding_idx != kNoPadding && ids_data[i] == padding_idx) {
|
|
|
|
|
// the gradient of padding_idx should be 0, already done by memset, so
|
|
|
|
|
// do nothing.
|
|
|
|
|
} else {
|
|
|
|
|
PADDLE_ENFORCE_LT(ids_data[i], N);
|
|
|
|
|
PADDLE_ENFORCE_GE(ids_data[i], 0);
|
|
|
|
|
for (int j = 0; j < D; ++j) {
|
|
|
|
|
d_table_data[ids_data[i] * D + j] += d_output_data[i * D + j];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|