|
|
|
@ -114,18 +114,18 @@ void ProcessALine(const std::vector<std::string>& columns, const Meta& meta,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int64_t SaveToText(std::ostream* os, std::shared_ptr<ValueBlock> block,
|
|
|
|
|
const std::vector<std::string>& saved_names,
|
|
|
|
|
const int mode) {
|
|
|
|
|
for (auto value : block->values_) {
|
|
|
|
|
std::vector<std::vector<float>*> vss = value.second->get(saved_names);
|
|
|
|
|
auto* vs = value.second->data_.data();
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
auto id = value.first;
|
|
|
|
|
ss << id << "\t";
|
|
|
|
|
for (int i = 0; i < static_cast<int>(vss.size()); i++) {
|
|
|
|
|
auto& vs = vss[i];
|
|
|
|
|
ss << paddle::string::join_strings((*vs), ',');
|
|
|
|
|
ss << "\t";
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < block->value_length_; i++) {
|
|
|
|
|
ss << vs[i];
|
|
|
|
|
ss << ",";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ss << "\n";
|
|
|
|
|
|
|
|
|
|
os->write(ss.str().c_str(), sizeof(char) * ss.str().size());
|
|
|
|
@ -159,62 +159,13 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<float>> kvalues;
|
|
|
|
|
ProcessALine(values, meta, &kvalues);
|
|
|
|
|
block->Init(id, &kvalues, 1);
|
|
|
|
|
// warning: need fix
|
|
|
|
|
block->Init(id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void SaveShard(std::shared_ptr<ValueBlock> block, const std::string& dirname,
|
|
|
|
|
const CommonAccessorParameter& common, const int mode,
|
|
|
|
|
const int pserver_id, const int shard_id) {
|
|
|
|
|
auto varname = common.table_name();
|
|
|
|
|
std::string var_store = string::Sprintf("%s/%s", dirname, varname);
|
|
|
|
|
VLOG(3) << "save " << varname << " in dir: " << var_store << " begin";
|
|
|
|
|
MkDirRecursively(var_store.c_str());
|
|
|
|
|
|
|
|
|
|
std::string shard_var_pre =
|
|
|
|
|
string::Sprintf("%s.block%d.%d", varname, pserver_id, shard_id);
|
|
|
|
|
std::string meta_ = string::Sprintf("%s/%s.meta", var_store, shard_var_pre);
|
|
|
|
|
std::string value_ = string::Sprintf("%s/%s.txt", var_store, shard_var_pre);
|
|
|
|
|
|
|
|
|
|
// save values
|
|
|
|
|
std::vector<std::string> params(common.params().begin(),
|
|
|
|
|
common.params().end());
|
|
|
|
|
std::unique_ptr<std::ofstream> value_out(new std::ofstream(value_));
|
|
|
|
|
SaveToText(value_out.get(), block, params, mode);
|
|
|
|
|
// save meta
|
|
|
|
|
std::stringstream stream;
|
|
|
|
|
stream << "param=" << common.table_name() << "\n";
|
|
|
|
|
stream << "server_id=" << pserver_id << "\n";
|
|
|
|
|
stream << "shard_id=" << shard_id << "\n";
|
|
|
|
|
stream << "row_names=" << paddle::string::join_strings(common.params(), ',')
|
|
|
|
|
<< "\n";
|
|
|
|
|
stream << "row_dims=" << paddle::string::join_strings(common.dims(), ',')
|
|
|
|
|
<< "\n";
|
|
|
|
|
stream << "count=" << block->values_.size() << "\n";
|
|
|
|
|
std::unique_ptr<std::ofstream> meta_out(new std::ofstream(meta_));
|
|
|
|
|
meta_out->write(stream.str().c_str(), sizeof(char) * stream.str().size());
|
|
|
|
|
meta_out->close();
|
|
|
|
|
VLOG(3) << "save " << varname << " in dir: " << var_store << " done";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CommonSparseTable::create_initializer(const std::string& attr,
|
|
|
|
|
const std::string& name) {
|
|
|
|
|
auto slices = string::split_string<std::string>(attr, "&");
|
|
|
|
|
|
|
|
|
|
if (slices[0] == "gaussian_random") {
|
|
|
|
|
initializers_[name] = new GaussianInitializer(slices);
|
|
|
|
|
} else if (slices[0] == "fill_constant") {
|
|
|
|
|
initializers_[name] = new FillConstantInitializer(slices);
|
|
|
|
|
} else if (slices[0] == "uniform_random") {
|
|
|
|
|
initializers_[name] = new UniformInitializer(slices);
|
|
|
|
|
} else {
|
|
|
|
|
PADDLE_THROW(
|
|
|
|
|
platform::errors::InvalidArgument("%s can not be supported", name));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t CommonSparseTable::initialize() {
|
|
|
|
|
_shards_task_pool.resize(task_pool_size_);
|
|
|
|
|
for (int i = 0; i < _shards_task_pool.size(); ++i) {
|
|
|
|
@ -224,31 +175,44 @@ int32_t CommonSparseTable::initialize() {
|
|
|
|
|
sync = _config.common().sync();
|
|
|
|
|
VLOG(1) << "table " << _config.common().table_name() << " is sync: " << sync;
|
|
|
|
|
|
|
|
|
|
initialize_value();
|
|
|
|
|
initialize_optimizer();
|
|
|
|
|
initialize_recorder();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t CommonSparseTable::initialize_recorder() { return 0; }
|
|
|
|
|
|
|
|
|
|
int32_t CommonSparseTable::initialize_value() {
|
|
|
|
|
auto common = _config.common();
|
|
|
|
|
int size = static_cast<int>(common.params().size());
|
|
|
|
|
|
|
|
|
|
size_t offset = 0;
|
|
|
|
|
for (int x = 0; x < size; ++x) {
|
|
|
|
|
auto& varname = common.params()[x];
|
|
|
|
|
auto& dim = common.dims()[x];
|
|
|
|
|
|
|
|
|
|
value_idx_[varname] = x;
|
|
|
|
|
value_names_.push_back(varname);
|
|
|
|
|
value_dims_.push_back(dim);
|
|
|
|
|
value_offsets_.push_back(offset);
|
|
|
|
|
initializer_attrs_.push_back(common.initializers()[x]);
|
|
|
|
|
|
|
|
|
|
if (varname == "Param") {
|
|
|
|
|
param_dim_ = dim;
|
|
|
|
|
param_offset_ = offset;
|
|
|
|
|
}
|
|
|
|
|
auto& initializer = common.initializers()[x];
|
|
|
|
|
create_initializer(initializer, varname);
|
|
|
|
|
|
|
|
|
|
offset += dim;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
initialize_value();
|
|
|
|
|
initialize_optimizer();
|
|
|
|
|
initialize_recorder();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t CommonSparseTable::initialize_recorder() { return 0; }
|
|
|
|
|
|
|
|
|
|
int32_t CommonSparseTable::initialize_value() {
|
|
|
|
|
shard_values_.reserve(task_pool_size_);
|
|
|
|
|
|
|
|
|
|
for (int x = 0; x < task_pool_size_; ++x) {
|
|
|
|
|
auto shard = std::make_shared<ValueBlock>(common, &initializers_);
|
|
|
|
|
auto shard =
|
|
|
|
|
std::make_shared<ValueBlock>(value_names_, value_dims_, value_offsets_,
|
|
|
|
|
value_idx_, initializer_attrs_, "none");
|
|
|
|
|
|
|
|
|
|
shard_values_.emplace_back(shard);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -281,14 +245,16 @@ int32_t CommonSparseTable::initialize_value() {
|
|
|
|
|
int32_t CommonSparseTable::initialize_optimizer() {
|
|
|
|
|
auto common = _config.common();
|
|
|
|
|
auto name = common.name();
|
|
|
|
|
auto attrs = common.attributes();
|
|
|
|
|
|
|
|
|
|
if (name == "sgd") {
|
|
|
|
|
optimizer_ = std::make_shared<SSGD>(common);
|
|
|
|
|
optimizer_ = std::make_shared<SSGD>(value_names_, value_dims_,
|
|
|
|
|
value_offsets_, value_idx_);
|
|
|
|
|
} else if (name == "adam") {
|
|
|
|
|
optimizer_ = std::make_shared<SAdam>(common);
|
|
|
|
|
optimizer_ = std::make_shared<SAdam>(value_names_, value_dims_,
|
|
|
|
|
value_offsets_, value_idx_);
|
|
|
|
|
} else if (name == "sum") {
|
|
|
|
|
optimizer_ = std::make_shared<SSUM>(common);
|
|
|
|
|
optimizer_ = std::make_shared<SSUM>(value_names_, value_dims_,
|
|
|
|
|
value_offsets_, value_idx_);
|
|
|
|
|
} else {
|
|
|
|
|
VLOG(0) << "init optimizer failed";
|
|
|
|
|
}
|
|
|
|
@ -330,8 +296,7 @@ int32_t CommonSparseTable::save(const std::string& dirname,
|
|
|
|
|
int64_t total_ins = 0;
|
|
|
|
|
for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) {
|
|
|
|
|
// save values
|
|
|
|
|
total_ins +=
|
|
|
|
|
SaveToText(value_out.get(), shard_values_[shard_id], params, mode);
|
|
|
|
|
total_ins += SaveToText(value_out.get(), shard_values_[shard_id], mode);
|
|
|
|
|
}
|
|
|
|
|
value_out->close();
|
|
|
|
|
|
|
|
|
@ -391,10 +356,6 @@ int32_t CommonSparseTable::pour() {
|
|
|
|
|
int32_t CommonSparseTable::pull_sparse(float* pull_values, const uint64_t* keys,
|
|
|
|
|
size_t num) {
|
|
|
|
|
rwlock_->RDLock();
|
|
|
|
|
std::vector<std::string> value_names;
|
|
|
|
|
for (auto name : _config.common().params()) {
|
|
|
|
|
value_names.push_back(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<uint64_t>> offset_bucket;
|
|
|
|
|
offset_bucket.resize(task_pool_size_);
|
|
|
|
@ -408,20 +369,18 @@ int32_t CommonSparseTable::pull_sparse(float* pull_values, const uint64_t* keys,
|
|
|
|
|
|
|
|
|
|
for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) {
|
|
|
|
|
tasks[shard_id] = _shards_task_pool[shard_id]->enqueue(
|
|
|
|
|
[this, shard_id, &keys, &offset_bucket, &value_names,
|
|
|
|
|
&pull_values]() -> int {
|
|
|
|
|
[this, shard_id, &keys, &offset_bucket, &pull_values]() -> int {
|
|
|
|
|
auto& block = shard_values_[shard_id];
|
|
|
|
|
auto& offsets = offset_bucket[shard_id];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < offsets.size(); ++i) {
|
|
|
|
|
auto offset = offsets[i];
|
|
|
|
|
auto id = keys[offset];
|
|
|
|
|
block->InitFromInitializer(id, value_names);
|
|
|
|
|
auto values = block->Get(id, {"Param"});
|
|
|
|
|
auto dim = values[0]->size();
|
|
|
|
|
std::copy(values[0]->begin(), values[0]->end(),
|
|
|
|
|
pull_values + dim * offset);
|
|
|
|
|
auto* value = block->InitFromInitializer(id);
|
|
|
|
|
std::copy_n(value + param_offset_, param_dim_,
|
|
|
|
|
pull_values + param_dim_ * offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
@ -492,10 +451,6 @@ int32_t CommonSparseTable::push_sparse(const uint64_t* keys,
|
|
|
|
|
int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
|
|
|
|
|
const float* values, size_t num) {
|
|
|
|
|
rwlock_->RDLock();
|
|
|
|
|
std::vector<std::string> value_names;
|
|
|
|
|
for (auto name : _config.common().params()) {
|
|
|
|
|
value_names.push_back(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<uint64_t>> offset_bucket;
|
|
|
|
|
offset_bucket.resize(task_pool_size_);
|
|
|
|
@ -509,18 +464,16 @@ int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
|
|
|
|
|
|
|
|
|
|
for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) {
|
|
|
|
|
tasks[shard_id] = _shards_task_pool[shard_id]->enqueue(
|
|
|
|
|
[this, shard_id, &keys, &offset_bucket, &value_names,
|
|
|
|
|
&values]() -> int {
|
|
|
|
|
[this, shard_id, &keys, &offset_bucket, &values]() -> int {
|
|
|
|
|
auto& block = shard_values_[shard_id];
|
|
|
|
|
auto& offsets = offset_bucket[shard_id];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < offsets.size(); ++i) {
|
|
|
|
|
auto offset = offsets[i];
|
|
|
|
|
auto id = keys[offset];
|
|
|
|
|
block->InitFromInitializer(id, value_names);
|
|
|
|
|
auto values_ = block->Get(id, {"Param"});
|
|
|
|
|
auto dim = values_[0]->size();
|
|
|
|
|
std::copy_n(values + dim * offset, dim, values_[0]->data());
|
|
|
|
|
auto* value = block->InitFromInitializer(id);
|
|
|
|
|
std::copy_n(values + param_dim_ * offset, param_dim_,
|
|
|
|
|
value + param_offset_);
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
});
|
|
|
|
|