add fs_local_open example

revert-16555-model_data_cryption_link_all_lib
dongdaxiang 6 years ago
parent cf1360643f
commit afaf937010

@ -23,7 +23,11 @@ endfunction()
add_subdirectory(ir) add_subdirectory(ir)
add_subdirectory(details) add_subdirectory(details)
<<<<<<< HEAD
add_subdirectory(fleet) add_subdirectory(fleet)
=======
add_subdirectory(common)
>>>>>>> add fs_local_open example
#ddim lib #ddim lib
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
proto_library(async_executor_proto SRCS data_feed.proto) proto_library(async_executor_proto SRCS data_feed.proto)

@ -18,6 +18,8 @@ limitations under the License. */
#include "google/protobuf/text_format.h" #include "google/protobuf/text_format.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/common/fs.h"
#include "paddle/fluid/framework/common/shell.h"
#include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/executor_thread_worker.h" #include "paddle/fluid/framework/executor_thread_worker.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"

@ -0,0 +1,2 @@
cc_library(fs SRCS fs.cc DEPS glog boost)
cc_library(shell SRCS shell.cc DEPS glog)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,100 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdio.h>
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/common/ps_string.h"
#include "paddle/fluid/framework/common/shell.h"
namespace paddle {
namespace framework {
int fs_select_internal(const std::string& path);
// localfs
extern size_t localfs_buffer_size();
extern void localfs_set_buffer_size(size_t x);
extern std::shared_ptr<FILE> localfs_open_read(std::string path,
const std::string& converter);
extern std::shared_ptr<FILE> localfs_open_write(std::string path,
const std::string& converter);
extern int64_t localfs_file_size(const std::string& path);
extern void localfs_remove(const std::string& path);
extern std::vector<std::string> localfs_list(const std::string& path);
extern std::string localfs_tail(const std::string& path);
extern bool localfs_exists(const std::string& path);
extern void localfs_mkdir(const std::string& path);
// hdfs
extern size_t hdfs_buffer_size();
extern void hdfs_set_buffer_size(size_t x);
extern const std::string& hdfs_command();
extern void hdfs_set_command(const std::string& x);
extern std::shared_ptr<FILE> hdfs_open_read(std::string path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> hdfs_open_write(std::string path, int* err_no,
const std::string& converter);
extern void hdfs_remove(const std::string& path);
extern std::vector<std::string> hdfs_list(const std::string& path);
extern std::string hdfs_tail(const std::string& path);
extern bool hdfs_exists(const std::string& path);
extern void hdfs_mkdir(const std::string& path);
// aut-detect fs
extern std::shared_ptr<FILE> fs_open_read(const std::string& path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> fs_open_write(const std::string& path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> fs_open(const std::string& path,
const std::string& mode, int* err_no,
const std::string& converter = "");
extern int64_t fs_file_size(const std::string& path);
extern void fs_remove(const std::string& path);
extern std::vector<std::string> fs_list(const std::string& path);
extern std::string fs_tail(const std::string& path);
extern bool fs_exists(const std::string& path);
extern void fs_mkdir(const std::string& path);
} // namespace framework
} // namespace paddle

@ -0,0 +1,238 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctype.h>
#include <stdio.h>
#include <cstring>
#include <string>
#include <vector>
#include "boost/lexical_cast.hpp"
#include "glog/logging.h"
namespace paddle {
namespace framework {
inline size_t count_spaces(const char* s) {
size_t count = 0;
while (*s != 0 && isspace(*s++)) {
count++;
}
return count;
}
inline size_t count_nonspaces(const char* s) {
size_t count = 0;
while (*s != 0 && !isspace(*s++)) {
count++;
}
return count;
}
template <class... ARGS>
void format_string_append(std::string& str, const char* fmt, // NOLINT
ARGS&&... args) { // use VA_ARGS may be better ?
int len = snprintf(NULL, 0, fmt, args...);
CHECK_GE(len, 0);
size_t oldlen = str.length();
str.resize(oldlen + len + 1);
CHECK(snprintf(&str[oldlen], (size_t)len + 1, fmt, args...) == len);
str.resize(oldlen + len);
}
template <class... ARGS>
void format_string_append(std::string& str, const std::string& fmt, // NOLINT
ARGS&&... args) {
format_string_append(str, fmt.c_str(), args...);
}
template <class... ARGS>
std::string format_string(const char* fmt, ARGS&&... args) {
std::string str;
format_string_append(str, fmt, args...);
return std::move(str);
}
template <class... ARGS>
std::string format_string(const std::string& fmt, ARGS&&... args) {
return format_string(fmt.c_str(), args...);
}
// remove leading and tailing spaces
inline std::string trim_spaces(const std::string& str) {
const char* p = str.c_str();
while (*p != 0 && isspace(*p)) {
p++;
}
size_t len = strlen(p);
while (len > 0 && isspace(p[len - 1])) {
len--;
}
return std::string(p, len);
}
inline int str_to_float(const char* str, float* v) {
const char* head = str;
char* cursor = NULL;
int index = 0;
while (*(head += count_spaces(head)) != 0) {
v[index++] = std::strtof(head, &cursor);
if (head == cursor) {
break;
}
head = cursor;
}
return index;
}
// split string by delim
template <class T = std::string>
std::vector<T> split_string(const std::string& str, const std::string& delim) {
size_t pre_pos = 0;
size_t pos = 0;
std::string tmp_str;
std::vector<T> res_list;
res_list.clear();
if (str.empty()) {
return res_list;
}
while ((pos = str.find(delim, pre_pos)) != std::string::npos) {
tmp_str.assign(str, pre_pos, pos - pre_pos);
res_list.push_back(tmp_str);
pre_pos = pos + 1;
}
tmp_str.assign(str, pre_pos, str.length() - pre_pos);
if (!tmp_str.empty()) {
res_list.push_back(tmp_str);
}
return res_list;
/*
size_t num = 1;
const char* p;
for (p = str.c_str(); *p != 0; p++) {
if (*p == delim) {
num++;
}
}
std::vector<T> list(num);
const char* last = str.c_str();
num = 0;
for (p = str.c_str(); *p != 0; p++) {
if (*p == delim) {
list[num++] = boost::lexical_cast<T>(last, p - last);
last = p + 1;
}
}
list[num] = boost::lexical_cast<T>(last, p - last);
return list;
*/
}
// split string by spaces. Leading and tailing spaces are ignored. Consecutive
// spaces are treated as one delim.
template <class T = std::string>
std::vector<T> split_string(const std::string& str) {
std::vector<T> list;
const char* p;
int pre_pos = 0;
int pos = 0;
std::string tmp_str;
if (str.empty()) {
return list;
}
for (p = str.c_str(); *p != 0;) {
if (!isspace(*p)) {
pos = pre_pos;
p++;
while (*p != 0 && !isspace(*p)) {
pos++;
p++;
}
tmp_str.assign(str, pre_pos, pos - pre_pos + 1);
list.push_back(tmp_str);
pre_pos = pos + 1;
} else {
pre_pos++;
p++;
}
}
return list;
}
template <class T>
std::string join_strings(const std::vector<T>& strs, char delim) {
std::string str;
for (size_t i = 0; i < strs.size(); i++) {
if (i > 0) {
str += delim;
}
str += boost::lexical_cast<std::string>(strs[i]);
}
return str;
}
// A helper class for reading lines from file. A line buffer is maintained. It
// doesn't need to know the maximum possible length of a line.
class LineFileReader {
public:
LineFileReader() {}
LineFileReader(LineFileReader&&) = delete;
LineFileReader(const LineFileReader&) = delete;
~LineFileReader() { ::free(_buffer); }
char* getline(FILE* f) { return this->getdelim(f, '\n'); }
char* getdelim(FILE* f, char delim) {
ssize_t ret = ::getdelim(&_buffer, &_buf_size, delim, f);
if (ret >= 0) {
if (ret >= 1 && _buffer[ret - 1] == delim) {
_buffer[--ret] = 0;
}
_length = (size_t)ret;
return _buffer;
} else {
_length = 0;
CHECK(feof(f));
return NULL;
}
}
char* get() { return _buffer; }
size_t length() { return _length; }
private:
char* _buffer = NULL;
size_t _buf_size = 0;
size_t _length = 0;
};
} // end namespace framework
} // end namespace paddle

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <memory>
#include <string>
#include <utility>
#include "glog/logging.h"
#include "paddle/fluid/framework/common/ps_string.h"
namespace paddle {
namespace framework {
inline bool& shell_verbose_internal() {
static bool x = false;
return x;
}
inline bool shell_verbose() { return shell_verbose_internal(); }
inline void shell_set_verbose(bool x) { shell_verbose_internal() = x; }
extern std::shared_ptr<FILE> shell_fopen(const std::string& path,
const std::string& mode);
extern std::shared_ptr<FILE> shell_popen(const std::string& cmd,
const std::string& mode, int* err_no);
extern std::pair<std::shared_ptr<FILE>, std::shared_ptr<FILE>> shell_p2open(
const std::string& cmd);
inline void shell_execute(const std::string& cmd) {
int err_no = 0;
do {
err_no = 0;
shell_popen(cmd, "w", &err_no);
} while (err_no == -1);
}
extern std::string shell_get_command_output(const std::string& cmd);
} // namespace framework
} // namespace paddle

@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <stdio_ext.h>
#include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/message.h" #include "google/protobuf/message.h"
#include "google/protobuf/text_format.h" #include "google/protobuf/text_format.h"
#include "common/fs.h"
#include "common/shell.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
@ -64,7 +67,7 @@ bool DataFeed::PickOneFile(std::string* filename) {
return false; return false;
} }
*filename = filelist_[file_idx_++]; *filename = filelist_[file_idx_++];
LOG(ERROR) << "pick file:" << *filename; // LOG(ERROR) << "pick file:" << *filename;
return true; return true;
} }
@ -91,8 +94,24 @@ void PrivateQueueDataFeed<T>::SetQueueSize(int queue_size) {
template <typename T> template <typename T>
bool PrivateQueueDataFeed<T>::Start() { bool PrivateQueueDataFeed<T>::Start() {
CheckSetFileList(); CheckSetFileList();
read_thread_ = std::thread(&PrivateQueueDataFeed::ReadThread, this); std::string filename;
read_thread_.detach(); while (PickOneFile(&filename)) {
int err_no = 0;
std::string pipeline_cmd = "cat";
std::string path =
"/home/users/dongdaxiang/pslib_ctr/local/data_mod/part-00012";
fp_ = fs_open_read(path, &err_no, pipeline_cmd);
__fsetlocking(&*fp_, FSETLOCKING_BYCALLER);
thread_local LineFileReader reader;
while (reader.getline(&*(fp_.get()))) {
LOG(ERROR) << "read a line";
}
read_thread_ = std::thread(&PrivateQueueDataFeed::ReadThread, this);
read_thread_.detach();
}
queue_->Close();
finish_start_ = true; finish_start_ = true;
return true; return true;
@ -100,17 +119,10 @@ bool PrivateQueueDataFeed<T>::Start() {
template <typename T> template <typename T>
void PrivateQueueDataFeed<T>::ReadThread() { void PrivateQueueDataFeed<T>::ReadThread() {
std::string filename; T instance;
while (PickOneFile(&filename)) { while (ParseOneInstanceFromPipe(&instance)) {
file_.open(filename.c_str()); // is_text_feed queue_->Send(instance);
PADDLE_ENFORCE(file_.good(), "Open file<%s> fail.", filename.c_str());
T instance;
while (ParseOneInstance(&instance)) {
queue_->Send(instance);
}
file_.close();
} }
queue_->Close();
} }
template <typename T> template <typename T>
@ -168,6 +180,14 @@ void MultiSlotDataFeed::Init(
finish_init_ = true; finish_init_ = true;
} }
void MultiSlotDataFeed::ReadThread() {
LOG(ERROR) << "Haha";
std::vector<MultiSlotType> instance;
while (ParseOneInstanceFromPipe(&instance)) {
queue_->Send(instance);
}
}
bool MultiSlotDataFeed::CheckFile(const char* filename) { bool MultiSlotDataFeed::CheckFile(const char* filename) {
CheckInit(); // get info of slots CheckInit(); // get info of slots
std::ifstream fin(filename); std::ifstream fin(filename);
@ -279,6 +299,65 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) {
return true; return true;
} }
bool MultiSlotDataFeed::ParseOneInstanceFromPipe(
std::vector<MultiSlotType>* instance) {
LOG(ERROR) << "hehe";
thread_local LineFileReader reader;
while (reader.getline(&*(fp_.get()))) {
/*
const char* str = reader.get();
std::string line = std::string(str);
LOG(ERROR) << line;
*/
LOG(ERROR) << "read a line";
}
return true;
/*
if (!reader.getline(fp_.get())) {
return false;
} else {
// std::string& line = reader_.get();
// const char* str = line.c_str();
const char* str = reader.get();
std::string line = std::string(str);
LOG(ERROR) << line;
char* endptr = const_cast<char*>(str);
int pos = 0;
for (size_t i = 0; i < use_slots_index_.size(); ++i) {
int idx = use_slots_index_[i];
int num = strtol(&str[pos], &endptr, 10);
PADDLE_ENFORCE(
num,
"The number of ids can not be zero, you need padding "
"it in data generator; or if there is something wrong with "
"the data, please check if the data contains unresolvable "
"characters.\nplease check this error line: %s",
str);
if (idx != -1) {
(*instance)[idx].Init(all_slots_type_[i]);
if ((*instance)[idx].GetType()[0] == 'f') { // float
for (int j = 0; j < num; ++j) {
float feasign = strtof(endptr, &endptr);
(*instance)[idx].AddValue(feasign);
}
} else if ((*instance)[idx].GetType()[0] == 'u') { // uint64
for (int j = 0; j < num; ++j) {
uint64_t feasign = (uint64_t)strtoull(endptr, &endptr, 10);
(*instance)[idx].AddValue(feasign);
}
}
pos = endptr - str;
} else {
for (int j = 0; j <= num; ++j) {
pos = line.find_first_of(' ', pos + 1);
}
}
}
return true;
}
*/
}
bool MultiSlotDataFeed::ParseOneInstance(std::vector<MultiSlotType>* instance) { bool MultiSlotDataFeed::ParseOneInstance(std::vector<MultiSlotType>* instance) {
std::string line; std::string line;
if (getline(file_, line)) { if (getline(file_, line)) {

@ -21,6 +21,7 @@ limitations under the License. */
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include "paddle/fluid/framework/common/ps_string.h"
#include "paddle/fluid/framework/data_feed.pb.h" #include "paddle/fluid/framework/data_feed.pb.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
@ -136,6 +137,7 @@ class PrivateQueueDataFeed : public DataFeed {
virtual void SetQueueSize(int queue_size); virtual void SetQueueSize(int queue_size);
// The reading and parsing method called in the ReadThread. // The reading and parsing method called in the ReadThread.
virtual bool ParseOneInstance(T* instance) = 0; virtual bool ParseOneInstance(T* instance) = 0;
virtual bool ParseOneInstanceFromPipe(T* instance) = 0;
// This function is used to put instance to vec_ins // This function is used to put instance to vec_ins
virtual void AddInstanceToInsVec(T* vec_ins, const T& instance, virtual void AddInstanceToInsVec(T* vec_ins, const T& instance,
int index) = 0; int index) = 0;
@ -150,7 +152,9 @@ class PrivateQueueDataFeed : public DataFeed {
// ifstream one line and one line parse: 6034 ms // ifstream one line and one line parse: 6034 ms
// fread one buffer and one buffer parse: 7097 ms // fread one buffer and one buffer parse: 7097 ms
std::ifstream file_; std::ifstream file_;
std::shared_ptr<FILE> fp_;
size_t queue_size_; size_t queue_size_;
LineFileReader reader_;
// The queue for store parsed data // The queue for store parsed data
std::unique_ptr<paddle::operators::reader::BlockingQueue<T>> queue_; std::unique_ptr<paddle::operators::reader::BlockingQueue<T>> queue_;
}; };
@ -228,12 +232,15 @@ class MultiSlotDataFeed
virtual ~MultiSlotDataFeed() {} virtual ~MultiSlotDataFeed() {}
virtual void Init(const paddle::framework::DataFeedDesc& data_feed_desc); virtual void Init(const paddle::framework::DataFeedDesc& data_feed_desc);
virtual bool CheckFile(const char* filename); virtual bool CheckFile(const char* filename);
// virtual void ReadThread();
protected: protected:
virtual void ReadThread();
virtual void AddInstanceToInsVec(std::vector<MultiSlotType>* vec_ins, virtual void AddInstanceToInsVec(std::vector<MultiSlotType>* vec_ins,
const std::vector<MultiSlotType>& instance, const std::vector<MultiSlotType>& instance,
int index); int index);
virtual bool ParseOneInstance(std::vector<MultiSlotType>* instance); virtual bool ParseOneInstance(std::vector<MultiSlotType>* instance);
virtual bool ParseOneInstanceFromPipe(std::vector<MultiSlotType>* instance);
virtual void PutToFeedVec(const std::vector<MultiSlotType>& ins_vec); virtual void PutToFeedVec(const std::vector<MultiSlotType>& ins_vec);
private: private:

@ -13,12 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/executor_thread_worker.h" #include "paddle/fluid/framework/executor_thread_worker.h"
#include <stdio_ext.h>
#include <algorithm> #include <algorithm>
#include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/message.h" #include "google/protobuf/message.h"
#include "google/protobuf/text_format.h" #include "google/protobuf/text_format.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/common/fs.h"
#include "paddle/fluid/framework/common/shell.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
@ -244,6 +247,8 @@ void ExecutorThreadWorker::TrainFilesWithTimer() {
platform::SetNumThreads(1); platform::SetNumThreads(1);
SetDevice(); SetDevice();
thread_reader_->Start(); thread_reader_->Start();
exit(0);
/*
std::vector<double> op_total_time; std::vector<double> op_total_time;
std::vector<std::string> op_name; std::vector<std::string> op_name;
for (auto& op : ops_) { for (auto& op : ops_) {
@ -287,13 +292,14 @@ void ExecutorThreadWorker::TrainFilesWithTimer() {
} }
timeline.Start(); timeline.Start();
} }
*/
} }
void ExecutorThreadWorker::TrainFiles() { void ExecutorThreadWorker::TrainFiles() {
platform::SetNumThreads(1); platform::SetNumThreads(1);
// todo: configurable // todo: configurable
SetDevice(); // SetDevice();
int fetch_var_num = fetch_var_names_.size(); int fetch_var_num = fetch_var_names_.size();
fetch_values_.clear(); fetch_values_.clear();

Loading…
Cancel
Save