|
|
|
|
@ -164,7 +164,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
|
|
|
|
|
unsigned int pos1 = XXH32(hash_id, len * sizeof(T), 0) % _space_len;
|
|
|
|
|
unsigned int pos2 = XXH32(hash_id, len * sizeof(T), _rand_len) % _space_len;
|
|
|
|
|
|
|
|
|
|
for (unsigned int j = 0; j != _num_emb; j += _rand_len) {
|
|
|
|
|
for (int j = 0; j != _num_emb; j += _rand_len) {
|
|
|
|
|
if (j + _rand_len < _num_emb) {
|
|
|
|
|
__builtin_prefetch(weights + pos2);
|
|
|
|
|
__builtin_prefetch(top_pos + j + _rand_len);
|
|
|
|
|
@ -204,7 +204,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
|
|
|
|
|
auto* buff = ctx.Output<LoDTensor>("X_Temp_Out");
|
|
|
|
|
buff->Resize(framework::make_ddim({bottom->dims()[0], bottom->dims()[1]}));
|
|
|
|
|
T* bottom_data = buff->mutable_data<T>(ctx.GetPlace());
|
|
|
|
|
for (size_t i = 0; i < bottom->dims()[0]; i++) {
|
|
|
|
|
for (int i = 0; i < bottom->dims()[0]; i++) {
|
|
|
|
|
bottom_data[i] = bottom_data_ori[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -237,7 +237,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
|
|
|
|
|
int* iter = drop_pos->mutable_data<int>(ctx.GetPlace());
|
|
|
|
|
int* iter_end = iter;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < top_offset.size() - 1; ++i) {
|
|
|
|
|
for (size_t i = 0; i < top_offset.size() - 1; ++i) {
|
|
|
|
|
int w = offset[i + 1] - offset[i];
|
|
|
|
|
int nsentense_with_pyramid = 0;
|
|
|
|
|
if (w < 2) {
|
|
|
|
|
@ -283,7 +283,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
|
|
|
|
|
|
|
|
|
|
iter = drop_pos->mutable_data<int>(ctx.GetPlace());
|
|
|
|
|
int top_counter = 0;
|
|
|
|
|
for (int i = 0; i < offset.size() - 1; ++i) {
|
|
|
|
|
for (size_t i = 0; i < offset.size() - 1; ++i) {
|
|
|
|
|
int w_drop = drop_pos_offset[i + 1] - drop_pos_offset[i];
|
|
|
|
|
int w = offset[i + 1] - offset[i];
|
|
|
|
|
if (w_drop == 0) {
|
|
|
|
|
@ -376,7 +376,7 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
void hash_embedding_bp(const T* hash_id, int len, const T* top_pos,
|
|
|
|
|
T* weights, T mlr, int _num_emb, int _rand_len,
|
|
|
|
|
int _space_len) const {
|
|
|
|
|
for (unsigned int j = 0; j != _num_emb; j += _rand_len) {
|
|
|
|
|
for (int j = 0; j != _num_emb; j += _rand_len) {
|
|
|
|
|
unsigned int pos = XXH32(hash_id, len * sizeof(T), j) % _space_len;
|
|
|
|
|
avx_axpy(top_pos + j, weights + pos, _rand_len, mlr);
|
|
|
|
|
}
|
|
|
|
|
@ -398,7 +398,7 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
auto* bottom_data = buff->data<T>();
|
|
|
|
|
|
|
|
|
|
int _slot_len = bottom->dims()[0];
|
|
|
|
|
if (_slot_len == bottom->lod()[0].size() - 1 &&
|
|
|
|
|
if (static_cast<size_t>(_slot_len) == bottom->lod()[0].size() - 1 &&
|
|
|
|
|
std::count(bottom_data, bottom_data + _slot_len, -1) == _slot_len) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
@ -412,7 +412,7 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
|
|
|
|
|
const int* iter = drop_pos->data<int>();
|
|
|
|
|
int top_counter = 0;
|
|
|
|
|
for (int i = 0; i < offset.size() - 1; ++i) {
|
|
|
|
|
for (size_t i = 0; i < offset.size() - 1; ++i) {
|
|
|
|
|
int w = offset[i + 1] - offset[i];
|
|
|
|
|
int w_drop = drop_pos_offset[i + 1] - drop_pos_offset[i];
|
|
|
|
|
if (w_drop == 0) {
|
|
|
|
|
|