|
|
|
@ -309,6 +309,19 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
|
|
|
|
|
/// \param[in] num_workers The number of threads in this operator
|
|
|
|
|
/// \return Shared pointer to the original object
|
|
|
|
|
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
|
|
|
|
|
#if !defined(_WIN32) && !defined(_WIN64)
|
|
|
|
|
#ifndef ENABLE_ANDROID
|
|
|
|
|
int32_t cpu_count = sysconf(_SC_NPROCESSORS_CONF);
|
|
|
|
|
if (cpu_count < 0 || cpu_count > INT32_MAX) {
|
|
|
|
|
MS_LOG(ERROR) << "Error determining current CPU: " << cpu_count;
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (num_workers < 1 || num_workers > cpu_count) {
|
|
|
|
|
MS_LOG(ERROR) << "num_workers exceeds the boundary between 1 and " << cpu_count;
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
num_workers_ = num_workers;
|
|
|
|
|
return shared_from_this();
|
|
|
|
|
}
|
|
|
|
@ -336,7 +349,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
|
|
|
|
|
/// range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency
|
|
|
|
|
/// can be set to default, which corresponds to 0/total_words separately
|
|
|
|
|
/// \param[in] top_k Number of words to be built into vocab. top_k most frequent words are
|
|
|
|
|
// taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken
|
|
|
|
|
/// taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken
|
|
|
|
|
/// \param[in] special_tokens A list of strings, each one is a special token
|
|
|
|
|
/// \param[in] special_first Whether special_tokens will be prepended/appended to vocab, If special_tokens
|
|
|
|
|
/// is specified and special_first is set to default, special_tokens will be prepended
|
|
|
|
|