Merge pull request #6622 from tensor-tang/omp

use Intel OpenMP to speedup seq2batch when WITH_MKL
del_some_in_makelist
Tao Luo 7 years ago committed by GitHub
commit 5ac72d9501
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -171,12 +171,31 @@ void SequenceToBatch::sequence2BatchCopy(Matrix &batch,
hl_sequence2batch_copy(
batchData, seqData, idxData, seqWidth, batchCount, seq2batch);
} else {
for (int i = 0; i < batchCount; ++i) {
if (seq2batch) {
if (seq2batch) {
#ifdef PADDLE_USE_MKLML
const int blockMemSize = 8 * 1024;
const int blockSize = blockMemSize / sizeof(real);
#pragma omp parallel for collapse(2)
for (int i = 0; i < batchCount; ++i) {
for (int j = 0; j < seqWidth; j += blockSize) {
memcpy(batch.rowBuf(i) + j,
sequence.rowBuf(idxData[i]) + j,
(j + blockSize > seqWidth) ? (seqWidth - j) * sizeof(real)
: blockMemSize);
}
}
#else
for (int i = 0; i < batchCount; ++i) {
memcpy(batch.rowBuf(i),
sequence.rowBuf(idxData[i]),
seqWidth * sizeof(real));
} else {
}
#endif
} else {
#ifdef PADDLE_USE_MKLML
#pragma omp parallel for
#endif
for (int i = 0; i < batchCount; ++i) {
memcpy(sequence.rowBuf(idxData[i]),
batch.rowBuf(i),
seqWidth * sizeof(real));

Loading…
Cancel
Save