|
|
|
@ -101,7 +101,7 @@ MSRStatus ShardReader::Init(const std::vector<std::string> &file_paths, bool loa
|
|
|
|
|
// sqlite3_open create a database if not found, use sqlite3_open_v2 instead of it
|
|
|
|
|
int rc = sqlite3_open_v2(common::SafeCStr(file + ".db"), &db, SQLITE_OPEN_READONLY, nullptr);
|
|
|
|
|
if (rc != SQLITE_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "Can't open database, error: " << sqlite3_errmsg(db);
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open database: " << file + ".db, error: " << sqlite3_errmsg(db);
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
MS_LOG(DEBUG) << "Opened database successfully";
|
|
|
|
@ -120,7 +120,7 @@ MSRStatus ShardReader::Init(const std::vector<std::string> &file_paths, bool loa
|
|
|
|
|
MS_LOG(DEBUG) << "Get " << static_cast<int>(name.size()) << " records from index.";
|
|
|
|
|
string shardName = GetFileName(file).second;
|
|
|
|
|
if (name.empty() || name[0][0] != shardName) {
|
|
|
|
|
MS_LOG(ERROR) << "DB file can not match file " << file;
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, DB file can not match file: " << file;
|
|
|
|
|
sqlite3_free(errmsg);
|
|
|
|
|
sqlite3_close(db);
|
|
|
|
|
db = nullptr;
|
|
|
|
@ -182,7 +182,7 @@ MSRStatus ShardReader::Open() {
|
|
|
|
|
std::shared_ptr<std::fstream> fs = std::make_shared<std::fstream>();
|
|
|
|
|
fs->open(common::SafeCStr(file), std::ios::in | std::ios::binary);
|
|
|
|
|
if (!fs->good()) {
|
|
|
|
|
MS_LOG(ERROR) << "File could not opened";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
MS_LOG(INFO) << "Open shard file successfully.";
|
|
|
|
@ -200,7 +200,7 @@ MSRStatus ShardReader::Open(int n_consumer) {
|
|
|
|
|
std::shared_ptr<std::fstream> fs = std::make_shared<std::fstream>();
|
|
|
|
|
fs->open(common::SafeCStr(file), std::ios::in | std::ios::binary);
|
|
|
|
|
if (!fs->good()) {
|
|
|
|
|
MS_LOG(ERROR) << "File could not opened";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
file_streams_random_[j].push_back(fs);
|
|
|
|
@ -385,7 +385,7 @@ MSRStatus ShardReader::ReadAllRowsInShard(int shard_id, const std::string &sql,
|
|
|
|
|
if (!all_in_index_) {
|
|
|
|
|
fs->open(common::SafeCStr(file_name), std::ios::in | std::ios::binary);
|
|
|
|
|
if (!fs->good()) {
|
|
|
|
|
MS_LOG(ERROR) << "File could not opened";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file_name;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -602,7 +602,7 @@ MSRStatus ShardReader::QueryWithCriteria(sqlite3 *db, string &sql, string criter
|
|
|
|
|
std::vector<std::vector<std::string>> &labels) {
|
|
|
|
|
sqlite3_stmt *stmt = nullptr;
|
|
|
|
|
if (sqlite3_prepare_v2(db, common::SafeCStr(sql), -1, &stmt, 0) != SQLITE_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "SQL error: could not prepare statement";
|
|
|
|
|
MS_LOG(ERROR) << "SQL error: could not prepare statement, sql: " << sql;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
int index = sqlite3_bind_parameter_index(stmt, ":criteria");
|
|
|
|
@ -631,7 +631,7 @@ std::pair<MSRStatus, std::vector<json>> ShardReader::GetLabelsFromBinaryFile(
|
|
|
|
|
std::shared_ptr<std::fstream> fs = std::make_shared<std::fstream>();
|
|
|
|
|
fs->open(common::SafeCStr(file_name), std::ios::in | std::ios::binary);
|
|
|
|
|
if (!fs->good()) {
|
|
|
|
|
MS_LOG(ERROR) << "File could not opened";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file_name;
|
|
|
|
|
return {FAILED, {}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -795,7 +795,8 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) {
|
|
|
|
|
sqlite3 *db = nullptr;
|
|
|
|
|
int rc = sqlite3_open_v2(common::SafeCStr(file_paths_[x] + ".db"), &db, SQLITE_OPEN_READONLY, nullptr);
|
|
|
|
|
if (SQLITE_OK != rc) {
|
|
|
|
|
MS_LOG(ERROR) << "Can't open database, error: " << sqlite3_errmsg(db);
|
|
|
|
|
MS_LOG(ERROR) << "Invalid file, failed to open database: " << file_paths_[x] + ".db, error: "
|
|
|
|
|
<< sqlite3_errmsg(db);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
threads[x] = std::thread(&ShardReader::GetClassesInShard, this, db, x, sql, std::ref(categories));
|
|
|
|
@ -970,19 +971,19 @@ MSRStatus ShardReader::CreateTasksByCategory(const std::vector<std::tuple<int, i
|
|
|
|
|
if (std::dynamic_pointer_cast<ShardPkSample>(op)) {
|
|
|
|
|
num_samples = std::dynamic_pointer_cast<ShardPkSample>(op)->GetNumSamples();
|
|
|
|
|
if (num_samples < 0) {
|
|
|
|
|
MS_LOG(ERROR) << "Parameter num_samples is not positive or zero";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid parameter, num_samples must be greater than or equal to 0, but got " << num_samples;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (num_elements <= 0) {
|
|
|
|
|
MS_LOG(ERROR) << "Parameter num_element is not positive";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid parameter, num_elements must be greater than 0, but got " << num_elements;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
if (categories.empty() == true) {
|
|
|
|
|
std::string category_field = category_op->GetCategoryField();
|
|
|
|
|
int64_t num_categories = category_op->GetNumCategories();
|
|
|
|
|
if (num_categories <= 0) {
|
|
|
|
|
MS_LOG(ERROR) << "Parameter num_categories is not positive";
|
|
|
|
|
MS_LOG(ERROR) << "Invalid parameter, num_categories must be greater than 0, but got " << num_elements;
|
|
|
|
|
return FAILED;
|
|
|
|
|
}
|
|
|
|
|
std::set<std::string> categories_set;
|
|
|
|
|