You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
214 lines
7.6 KiB
214 lines
7.6 KiB
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include <ThreadPool.h>
|
|
|
|
#include <unistd.h>
|
|
#include <string>
|
|
#include <thread> // NOLINT
|
|
|
|
#include "google/protobuf/text_format.h"
|
|
#include "gtest/gtest.h"
|
|
#include "paddle/fluid/distributed/ps.pb.h"
|
|
#include "paddle/fluid/distributed/table/common_dense_table.h"
|
|
#include "paddle/fluid/distributed/table/common_sparse_table.h"
|
|
#include "paddle/fluid/distributed/table/sparse_geo_table.h"
|
|
#include "paddle/fluid/distributed/table/table.h"
|
|
|
|
namespace paddle {
|
|
namespace distributed {
|
|
|
|
// CommonSparseTable + SSGD
|
|
TEST(CommonSparseTable, SGD) {
|
|
int emb_dim = 10;
|
|
int trainers = 2;
|
|
|
|
TableParameter table_config;
|
|
table_config.set_table_class("CommonSparseTable");
|
|
FsClientParameter fs_config;
|
|
Table *table = new CommonSparseTable();
|
|
TableAccessorParameter *accessor_config = table_config.mutable_accessor();
|
|
accessor_config->set_accessor_class("CommMergeAccessor");
|
|
CommonAccessorParameter *common_config = table_config.mutable_common();
|
|
common_config->set_name("sgd");
|
|
common_config->set_table_name("sgd_test_table");
|
|
common_config->set_trainer_num(trainers);
|
|
common_config->add_params("Param");
|
|
common_config->add_dims(emb_dim);
|
|
common_config->add_initializers("uniform_random&0&-1.0&1.0"); // param
|
|
common_config->add_params("LearningRate");
|
|
common_config->add_dims(1);
|
|
common_config->add_initializers("fill_constant&1.0"); // learning_rate
|
|
auto ret = table->initialize(table_config, fs_config);
|
|
ASSERT_EQ(ret, 0);
|
|
|
|
// pull parameters for create and check
|
|
std::vector<uint64_t> init_keys = {0, 1, 2, 3, 4};
|
|
std::vector<float> init_values;
|
|
init_values.resize(init_keys.size() * emb_dim);
|
|
table->pull_sparse(init_values.data(), init_keys.data(), init_keys.size());
|
|
|
|
// for check
|
|
std::vector<float> total_gradients;
|
|
total_gradients.resize(init_keys.size() * emb_dim);
|
|
memset(total_gradients.data(), 0, sizeof(float) * total_gradients.size());
|
|
|
|
// push gradient
|
|
std::vector<std::vector<uint64_t>> trainer_keys;
|
|
std::vector<std::vector<float>> trainer_gradient_values;
|
|
trainer_keys.resize(trainers);
|
|
trainer_gradient_values.resize(trainers);
|
|
float start = 0.0;
|
|
for (int i = 0; i < trainers; i++) {
|
|
trainer_keys[i] = init_keys;
|
|
for (size_t j = 0; j < trainer_keys[i].size(); j++) {
|
|
auto id = trainer_keys[i][j];
|
|
for (int k = 0; k < emb_dim; k++) {
|
|
trainer_gradient_values[i].push_back(start);
|
|
total_gradients[id * emb_dim + k] += start;
|
|
start += 0.1;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<::ThreadPool> pool_ =
|
|
std::make_shared<::ThreadPool>(trainers);
|
|
std::vector<std::future<void>> task_status;
|
|
for (int i = 0; i < trainers; i++) {
|
|
auto &push_keys = trainer_keys[i];
|
|
auto &push_values = trainer_gradient_values[i];
|
|
auto task = [table, &push_keys, &push_values] {
|
|
table->push_sparse(push_keys.data(), push_values.data(),
|
|
push_keys.size());
|
|
};
|
|
task_status.push_back(pool_->enqueue(std::move(task)));
|
|
}
|
|
for (auto &status : task_status) {
|
|
status.wait();
|
|
}
|
|
|
|
std::vector<float> pull_values;
|
|
pull_values.resize(init_keys.size() * emb_dim);
|
|
table->pull_sparse(pull_values.data(), init_keys.data(), init_keys.size());
|
|
for (size_t i = 0; i < init_values.size(); ++i) {
|
|
auto update_val = init_values[i] - 1.0 * total_gradients[i];
|
|
ASSERT_TRUE(abs(update_val - pull_values[i]) < 1e-5);
|
|
}
|
|
}
|
|
|
|
// CommonSparseTable + Adam
|
|
TEST(CommonSparseTable, Adam) {
|
|
int emb_dim = 10;
|
|
int trainers = 2;
|
|
float beta1 = 0.9;
|
|
float beta2 = 0.999;
|
|
float epsilon = 1.0e-8;
|
|
|
|
TableParameter table_config;
|
|
table_config.set_table_class("CommonSparseTable");
|
|
FsClientParameter fs_config;
|
|
Table *table = new CommonSparseTable();
|
|
TableAccessorParameter *accessor_config = table_config.mutable_accessor();
|
|
accessor_config->set_accessor_class("CommMergeAccessor");
|
|
CommonAccessorParameter *common_config = table_config.mutable_common();
|
|
common_config->set_name("adam");
|
|
common_config->set_table_name("adam_test_table");
|
|
common_config->set_trainer_num(trainers);
|
|
common_config->add_params("Param");
|
|
common_config->add_dims(emb_dim);
|
|
common_config->add_initializers("uniform_random&0&-1.0&1.0");
|
|
common_config->add_params("LearningRate");
|
|
common_config->add_dims(1);
|
|
common_config->add_initializers("fill_constant&1.0");
|
|
common_config->add_params("Moment1");
|
|
common_config->add_dims(emb_dim);
|
|
common_config->add_initializers("fill_constant&0.0");
|
|
common_config->add_params("Moment2");
|
|
common_config->add_dims(emb_dim);
|
|
common_config->add_initializers("fill_constant&0.0");
|
|
common_config->add_params("Beta1Pow");
|
|
common_config->add_dims(1);
|
|
common_config->add_initializers("fill_constant&1.0");
|
|
common_config->add_params("Beta2Pow");
|
|
common_config->add_dims(1);
|
|
common_config->add_initializers("fill_constant&1.0");
|
|
auto ret = table->initialize(table_config, fs_config);
|
|
ASSERT_EQ(ret, 0);
|
|
|
|
// pull parameters for create and check
|
|
std::vector<uint64_t> init_keys = {0, 1, 2, 3, 4};
|
|
std::vector<float> init_values;
|
|
init_values.resize(init_keys.size() * emb_dim);
|
|
table->pull_sparse(init_values.data(), init_keys.data(), init_keys.size());
|
|
|
|
// push gradient
|
|
std::vector<std::vector<uint64_t>> trainer_keys;
|
|
std::vector<std::vector<float>> trainer_gradient_values;
|
|
trainer_keys.resize(trainers);
|
|
trainer_gradient_values.resize(trainers);
|
|
float start = 0.0;
|
|
for (int i = 0; i < trainers; i++) {
|
|
trainer_keys[i] = init_keys;
|
|
for (size_t j = 0; j < trainer_keys[i].size(); j++) {
|
|
for (int k = 0; k < emb_dim; k++) {
|
|
trainer_gradient_values[i].push_back(start);
|
|
start += 0.1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < trainers; i++) {
|
|
auto &push_keys = trainer_keys[i];
|
|
auto &push_values = trainer_gradient_values[i];
|
|
table->push_sparse(push_keys.data(), push_values.data(), push_keys.size());
|
|
}
|
|
|
|
std::vector<float> pull_values;
|
|
pull_values.resize(init_keys.size() * emb_dim);
|
|
table->pull_sparse(pull_values.data(), init_keys.data(), init_keys.size());
|
|
|
|
for (size_t idx = 0; idx < init_keys.size(); idx += emb_dim) {
|
|
std::vector<float> beta1_pow, beta2_pow, lr, mom1, mom2, param;
|
|
beta1_pow.push_back(beta1);
|
|
beta2_pow.push_back(beta2);
|
|
lr.push_back(1.0);
|
|
for (int i = 0; i < emb_dim; i++) {
|
|
mom1.push_back(0.0);
|
|
mom2.push_back(0.0);
|
|
param.push_back(init_values[idx + i]);
|
|
}
|
|
for (int i = 0; i < trainers; i++) {
|
|
auto lr_ = lr[0] * sqrt(1 - beta2_pow[0]) / (1 - beta1_pow[0]);
|
|
for (int j = 0; j < emb_dim; j++) {
|
|
mom1[j] =
|
|
beta1 * mom1[j] + (1 - beta1) * trainer_gradient_values[i][idx + j];
|
|
mom2[j] = beta2 * mom2[j] +
|
|
(1 - beta2) * trainer_gradient_values[i][idx + j] *
|
|
trainer_gradient_values[i][idx + j];
|
|
param[j] = param[j] -
|
|
lr_ * (mom1[j] /
|
|
(sqrt(mom2[j]) + epsilon * sqrt(1 - beta2_pow[0])));
|
|
}
|
|
beta1_pow[0] *= beta1;
|
|
beta2_pow[0] *= beta2;
|
|
}
|
|
for (int i = 0; i < emb_dim; i++) {
|
|
ASSERT_TRUE(abs(param[i] - pull_values[idx + i]) < 1e-5);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace distributed
|
|
} // namespace paddle
|