|
|
|
@ -55,7 +55,9 @@ MSRStatus ShardHeader::InitializeHeader(const std::vector<json> &headers, bool l
|
|
|
|
|
header_size_ = header["header_size"].get<uint64_t>();
|
|
|
|
|
page_size_ = header["page_size"].get<uint64_t>();
|
|
|
|
|
}
|
|
|
|
|
ParsePage(header["page"], shard_index, load_dataset);
|
|
|
|
|
if (SUCCESS != ParsePage(header["page"], shard_index, load_dataset)) {
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
shard_index++;
|
|
|
|
|
}
|
|
|
|
|
return SUCCESS;
|
|
|
|
@ -248,11 +250,16 @@ MSRStatus ShardHeader::ParseIndexFields(const json &index_fields) {
|
|
|
|
|
return SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ShardHeader::ParsePage(const json &pages, int shard_index, bool load_dataset) {
|
|
|
|
|
MSRStatus ShardHeader::ParsePage(const json &pages, int shard_index, bool load_dataset) {
|
|
|
|
|
// set shard_index when load_dataset is false
|
|
|
|
|
if (pages_.empty() && shard_count_ <= kMaxShardCount) {
|
|
|
|
|
if (shard_count_ > kMaxFileCount) {
|
|
|
|
|
MS_LOG(ERROR) << "The number of mindrecord files is greater than max value: " << kMaxFileCount;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
if (pages_.empty() && shard_count_ <= kMaxFileCount) {
|
|
|
|
|
pages_.resize(shard_count_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto &page : pages) {
|
|
|
|
|
int page_id = page["page_id"];
|
|
|
|
|
int shard_id = page["shard_id"];
|
|
|
|
@ -275,6 +282,7 @@ void ShardHeader::ParsePage(const json &pages, int shard_index, bool load_datase
|
|
|
|
|
pages_[shard_index].push_back(std::move(parsed_page));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MSRStatus ShardHeader::ParseStatistics(const json &statistics) {
|
|
|
|
@ -715,7 +723,9 @@ MSRStatus ShardHeader::FileToPages(const std::string dump_file_name) {
|
|
|
|
|
|
|
|
|
|
std::string line;
|
|
|
|
|
while (std::getline(page_in_handle, line)) {
|
|
|
|
|
ParsePage(json::parse(line), -1, true);
|
|
|
|
|
if (SUCCESS != ParsePage(json::parse(line), -1, true)) {
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
page_in_handle.close();
|
|
|
|
|