|
|
|
@ -46,7 +46,7 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
|
|
|
|
|
x_content = _mm512_loadu_ps(x + i_offset);
|
|
|
|
|
alpha_content = _mm512_add_ps(w_content, x_content);
|
|
|
|
|
// Save the alpha value.
|
|
|
|
|
_mm512_storeu_ps(alpha_value + i_offset, alpha_content);
|
|
|
|
|
_mm512_storeu_ps(alpha + i_offset, alpha_content);
|
|
|
|
|
#else
|
|
|
|
|
// AVX or AVX2
|
|
|
|
|
// weights, input and alpha values.
|
|
|
|
@ -131,13 +131,12 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
|
|
|
|
|
}
|
|
|
|
|
/* Update the alpha and track values. */
|
|
|
|
|
#ifdef __AVX512F__
|
|
|
|
|
__m512 x_content =
|
|
|
|
|
_mm512_loadu_ps(x + seq_offset + this->num_ + j_offset);
|
|
|
|
|
__m512 x_content = _mm512_loadu_ps(x + seq_offset + tag_num + j_offset);
|
|
|
|
|
max_score = _mm512_add_ps(max_score, x_content);
|
|
|
|
|
_mm512_storeu_ps(alpha + seq_offset + this->num_ + j_offset, max_score);
|
|
|
|
|
_mm512_storeu_si512(reinterpret_cast<__m512i*>(track + seq_offset +
|
|
|
|
|
this->num_ + j_offset),
|
|
|
|
|
max_j);
|
|
|
|
|
_mm512_storeu_ps(alpha + seq_offset + tag_num + j_offset, max_score);
|
|
|
|
|
_mm512_storeu_si512(
|
|
|
|
|
reinterpret_cast<__m512i*>(track + seq_offset + tag_num + j_offset),
|
|
|
|
|
max_j);
|
|
|
|
|
#else
|
|
|
|
|
__m256 x_content = _mm256_loadu_ps(x + seq_offset + tag_num + j_offset);
|
|
|
|
|
max_score = _mm256_add_ps(max_score, x_content);
|
|
|
|
|