@ -144,6 +144,7 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
mul_func_t mul_func = (mul_func_t) mul.getCode();
#pragma omp parallel for collapse(2)
for (int ni = 0; ni < n; ni++) {
for (int ci = 0; ci < C; ci++) {
auto ptr_x =