|
|
|
@ -29,7 +29,7 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t en
|
|
|
|
|
auto linear = input_params->linear_;
|
|
|
|
|
auto lr = input_params->lr_;
|
|
|
|
|
auto l1 = input_params->l1_;
|
|
|
|
|
auto l2 = input_params->l2_;
|
|
|
|
|
auto l2_plus = 2 * input_params->l2_;
|
|
|
|
|
auto lr_power = input_params->lr_power_;
|
|
|
|
|
auto unique_sparse_grad = input_params->sparse_grad_;
|
|
|
|
|
auto var_first_dim_size = input_params->var_first_dim_size_;
|
|
|
|
@ -44,21 +44,18 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t en
|
|
|
|
|
for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) {
|
|
|
|
|
auto summed_grad = unique_sparse_grad.value_[k];
|
|
|
|
|
auto accum_new = accum[j] + summed_grad * summed_grad;
|
|
|
|
|
if (lr_power == -0.5) {
|
|
|
|
|
linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr * var[j];
|
|
|
|
|
} else {
|
|
|
|
|
linear[j] += summed_grad - (std::pow(accum_new, -lr_power) - std::pow(accum[j], -lr_power)) / lr * var[j];
|
|
|
|
|
}
|
|
|
|
|
auto x = Sign(linear[j]) * l1 - linear[j];
|
|
|
|
|
float y;
|
|
|
|
|
if (lr_power == -0.5) {
|
|
|
|
|
y = std::sqrt(accum_new) / lr + 2 * l2;
|
|
|
|
|
y = std::sqrt(accum_new);
|
|
|
|
|
linear[j] += summed_grad - (y - std::sqrt(accum[j])) / lr * var[j];
|
|
|
|
|
} else {
|
|
|
|
|
y = std::pow(accum_new, -lr_power) / lr + 2 * l2;
|
|
|
|
|
y = std::pow(accum_new, -lr_power);
|
|
|
|
|
linear[j] += summed_grad - (y - std::pow(accum[j], -lr_power)) / lr * var[j];
|
|
|
|
|
}
|
|
|
|
|
auto pre_shrink = x / y;
|
|
|
|
|
var[j] = std::fabs(linear[j]) > l1 ? pre_shrink : 0;
|
|
|
|
|
accum[j] = accum_new;
|
|
|
|
|
auto x = Sign(linear[j]) * l1 - linear[j];
|
|
|
|
|
y = y / lr + l2_plus;
|
|
|
|
|
var[j] = std::fabs(linear[j]) > l1 ? x / y : 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|