add fs_local_open example

revert-16555-model_data_cryption_link_all_lib
dongdaxiang 6 years ago
parent cf1360643f
commit afaf937010

@ -23,7 +23,11 @@ endfunction()
add_subdirectory(ir)
add_subdirectory(details)
<<<<<<< HEAD
add_subdirectory(fleet)
=======
add_subdirectory(common)
>>>>>>> add fs_local_open example
#ddim lib
proto_library(framework_proto SRCS framework.proto)
proto_library(async_executor_proto SRCS data_feed.proto)

@ -18,6 +18,8 @@ limitations under the License. */
#include "google/protobuf/text_format.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/common/fs.h"
#include "paddle/fluid/framework/common/shell.h"
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/executor_thread_worker.h"
#include "paddle/fluid/framework/feed_fetch_method.h"

@ -0,0 +1,2 @@
cc_library(fs SRCS fs.cc DEPS glog boost)
cc_library(shell SRCS shell.cc DEPS glog)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,100 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdio.h>
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/common/ps_string.h"
#include "paddle/fluid/framework/common/shell.h"
namespace paddle {
namespace framework {
int fs_select_internal(const std::string& path);
// localfs
extern size_t localfs_buffer_size();
extern void localfs_set_buffer_size(size_t x);
extern std::shared_ptr<FILE> localfs_open_read(std::string path,
const std::string& converter);
extern std::shared_ptr<FILE> localfs_open_write(std::string path,
const std::string& converter);
extern int64_t localfs_file_size(const std::string& path);
extern void localfs_remove(const std::string& path);
extern std::vector<std::string> localfs_list(const std::string& path);
extern std::string localfs_tail(const std::string& path);
extern bool localfs_exists(const std::string& path);
extern void localfs_mkdir(const std::string& path);
// hdfs
extern size_t hdfs_buffer_size();
extern void hdfs_set_buffer_size(size_t x);
extern const std::string& hdfs_command();
extern void hdfs_set_command(const std::string& x);
extern std::shared_ptr<FILE> hdfs_open_read(std::string path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> hdfs_open_write(std::string path, int* err_no,
const std::string& converter);
extern void hdfs_remove(const std::string& path);
extern std::vector<std::string> hdfs_list(const std::string& path);
extern std::string hdfs_tail(const std::string& path);
extern bool hdfs_exists(const std::string& path);
extern void hdfs_mkdir(const std::string& path);
// aut-detect fs
extern std::shared_ptr<FILE> fs_open_read(const std::string& path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> fs_open_write(const std::string& path, int* err_no,
const std::string& converter);
extern std::shared_ptr<FILE> fs_open(const std::string& path,
const std::string& mode, int* err_no,
const std::string& converter = "");
extern int64_t fs_file_size(const std::string& path);
extern void fs_remove(const std::string& path);
extern std::vector<std::string> fs_list(const std::string& path);
extern std::string fs_tail(const std::string& path);
extern bool fs_exists(const std::string& path);
extern void fs_mkdir(const std::string& path);
} // namespace framework
} // namespace paddle

@ -0,0 +1,238 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctype.h>
#include <stdio.h>
#include <cstring>
#include <string>
#include <vector>
#include "boost/lexical_cast.hpp"
#include "glog/logging.h"
namespace paddle {
namespace framework {
inline size_t count_spaces(const char* s) {
size_t count = 0;
while (*s != 0 && isspace(*s++)) {
count++;
}
return count;
}
inline size_t count_nonspaces(const char* s) {
size_t count = 0;
while (*s != 0 && !isspace(*s++)) {
count++;
}
return count;
}
template <class... ARGS>
void format_string_append(std::string& str, const char* fmt, // NOLINT
ARGS&&... args) { // use VA_ARGS may be better ?
int len = snprintf(NULL, 0, fmt, args...);
CHECK_GE(len, 0);
size_t oldlen = str.length();
str.resize(oldlen + len + 1);
CHECK(snprintf(&str[oldlen], (size_t)len + 1, fmt, args...) == len);
str.resize(oldlen + len);
}
template <class... ARGS>
void format_string_append(std::string& str, const std::string& fmt, // NOLINT
ARGS&&... args) {
format_string_append(str, fmt.c_str(), args...);
}
template <class... ARGS>
std::string format_string(const char* fmt, ARGS&&... args) {
std::string str;
format_string_append(str, fmt, args...);
return std::move(str);
}
template <class... ARGS>
std::string format_string(const std::string& fmt, ARGS&&... args) {
return format_string(fmt.c_str(), args...);
}
// remove leading and tailing spaces
inline std::string trim_spaces(const std::string& str) {
const char* p = str.c_str();
while (*p != 0 && isspace(*p)) {
p++;
}
size_t len = strlen(p);
while (len > 0 && isspace(p[len - 1])) {
len--;
}
return std::string(p, len);
}
inline int str_to_float(const char* str, float* v) {
const char* head = str;
char* cursor = NULL;
int index = 0;
while (*(head += count_spaces(head)) != 0) {
v[index++] = std::strtof(head, &cursor);
if (head == cursor) {
break;
}
head = cursor;
}
return index;
}
// split string by delim
template <class T = std::string>
std::vector<T> split_string(const std::string& str, const std::string& delim) {
size_t pre_pos = 0;
size_t pos = 0;
std::string tmp_str;
std::vector<T> res_list;
res_list.clear();
if (str.empty()) {
return res_list;
}
while ((pos = str.find(delim, pre_pos)) != std::string::npos) {
tmp_str.assign(str, pre_pos, pos - pre_pos);
res_list.push_back(tmp_str);
pre_pos = pos + 1;
}
tmp_str.assign(str, pre_pos, str.length() - pre_pos);
if (!tmp_str.empty()) {
res_list.push_back(tmp_str);
}
return res_list;
/*
size_t num = 1;
const char* p;
for (p = str.c_str(); *p != 0; p++) {
if (*p == delim) {
num++;
}
}
std::vector<T> list(num);
const char* last = str.c_str();
num = 0;
for (p = str.c_str(); *p != 0; p++) {
if (*p == delim) {
list[num++] = boost::lexical_cast<T>(last, p - last);
last = p + 1;
}
}
list[num] = boost::lexical_cast<T>(last, p - last);
return list;
*/
}
// split string by spaces. Leading and tailing spaces are ignored. Consecutive
// spaces are treated as one delim.
template <class T = std::string>
std::vector<T> split_string(const std::string& str) {
std::vector<T> list;
const char* p;
int pre_pos = 0;
int pos = 0;
std::string tmp_str;
if (str.empty()) {
return list;
}
for (p = str.c_str(); *p != 0;) {
if (!isspace(*p)) {
pos = pre_pos;
p++;
while (*p != 0 && !isspace(*p)) {
pos++;
p++;
}
tmp_str.assign(str, pre_pos, pos - pre_pos + 1);
list.push_back(tmp_str);
pre_pos = pos + 1;
} else {
pre_pos++;
p++;
}
}
return list;
}
template <class T>
std::string join_strings(const std::vector<T>& strs, char delim) {
std::string str;
for (size_t i = 0; i < strs.size(); i++) {
if (i > 0) {
str += delim;
}
str += boost::lexical_cast<std::string>(strs[i]);
}
return str;
}
// A helper class for reading lines from file. A line buffer is maintained. It
// doesn't need to know the maximum possible length of a line.
class LineFileReader {
public:
LineFileReader() {}
LineFileReader(LineFileReader&&) = delete;
LineFileReader(const LineFileReader&) = delete;
~LineFileReader() { ::free(_buffer); }
char* getline(FILE* f) { return this->getdelim(f, '\n'); }
char* getdelim(FILE* f, char delim) {
ssize_t ret = ::getdelim(&_buffer, &_buf_size, delim, f);
if (ret >= 0) {
if (ret >= 1 && _buffer[ret - 1] == delim) {
_buffer[--ret] = 0;
}
_length = (size_t)ret;
return _buffer;
} else {
_length = 0;
CHECK(feof(f));
return NULL;
}
}
char* get() { return _buffer; }
size_t length() { return _length; }
private:
char* _buffer = NULL;
size_t _buf_size = 0;
size_t _length = 0;
};
} // end namespace framework
} // end namespace paddle

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <memory>
#include <string>
#include <utility>
#include "glog/logging.h"
#include "paddle/fluid/framework/common/ps_string.h"
namespace paddle {
namespace framework {
inline bool& shell_verbose_internal() {
static bool x = false;
return x;
}
inline bool shell_verbose() { return shell_verbose_internal(); }
inline void shell_set_verbose(bool x) { shell_verbose_internal() = x; }
extern std::shared_ptr<FILE> shell_fopen(const std::string& path,
const std::string& mode);
extern std::shared_ptr<FILE> shell_popen(const std::string& cmd,
const std::string& mode, int* err_no);
extern std::pair<std::shared_ptr<FILE>, std::shared_ptr<FILE>> shell_p2open(
const std::string& cmd);
inline void shell_execute(const std::string& cmd) {
int err_no = 0;
do {
err_no = 0;
shell_popen(cmd, "w", &err_no);
} while (err_no == -1);
}
extern std::string shell_get_command_output(const std::string& cmd);
} // namespace framework
} // namespace paddle

@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdio_ext.h>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "common/fs.h"
#include "common/shell.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
@ -64,7 +67,7 @@ bool DataFeed::PickOneFile(std::string* filename) {
return false;
}
*filename = filelist_[file_idx_++];
LOG(ERROR) << "pick file:" << *filename;
// LOG(ERROR) << "pick file:" << *filename;
return true;
}
@ -91,8 +94,24 @@ void PrivateQueueDataFeed<T>::SetQueueSize(int queue_size) {
template <typename T>
bool PrivateQueueDataFeed<T>::Start() {
CheckSetFileList();
read_thread_ = std::thread(&PrivateQueueDataFeed::ReadThread, this);
read_thread_.detach();
std::string filename;
while (PickOneFile(&filename)) {
int err_no = 0;
std::string pipeline_cmd = "cat";
std::string path =
"/home/users/dongdaxiang/pslib_ctr/local/data_mod/part-00012";
fp_ = fs_open_read(path, &err_no, pipeline_cmd);
__fsetlocking(&*fp_, FSETLOCKING_BYCALLER);
thread_local LineFileReader reader;
while (reader.getline(&*(fp_.get()))) {
LOG(ERROR) << "read a line";
}
read_thread_ = std::thread(&PrivateQueueDataFeed::ReadThread, this);
read_thread_.detach();
}
queue_->Close();
finish_start_ = true;
return true;
@ -100,17 +119,10 @@ bool PrivateQueueDataFeed<T>::Start() {
template <typename T>
void PrivateQueueDataFeed<T>::ReadThread() {
std::string filename;
while (PickOneFile(&filename)) {
file_.open(filename.c_str()); // is_text_feed
PADDLE_ENFORCE(file_.good(), "Open file<%s> fail.", filename.c_str());
T instance;
while (ParseOneInstance(&instance)) {
queue_->Send(instance);
}
file_.close();
T instance;
while (ParseOneInstanceFromPipe(&instance)) {
queue_->Send(instance);
}
queue_->Close();
}
template <typename T>
@ -168,6 +180,14 @@ void MultiSlotDataFeed::Init(
finish_init_ = true;
}
void MultiSlotDataFeed::ReadThread() {
LOG(ERROR) << "Haha";
std::vector<MultiSlotType> instance;
while (ParseOneInstanceFromPipe(&instance)) {
queue_->Send(instance);
}
}
bool MultiSlotDataFeed::CheckFile(const char* filename) {
CheckInit(); // get info of slots
std::ifstream fin(filename);
@ -279,6 +299,65 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) {
return true;
}
bool MultiSlotDataFeed::ParseOneInstanceFromPipe(
std::vector<MultiSlotType>* instance) {
LOG(ERROR) << "hehe";
thread_local LineFileReader reader;
while (reader.getline(&*(fp_.get()))) {
/*
const char* str = reader.get();
std::string line = std::string(str);
LOG(ERROR) << line;
*/
LOG(ERROR) << "read a line";
}
return true;
/*
if (!reader.getline(fp_.get())) {
return false;
} else {
// std::string& line = reader_.get();
// const char* str = line.c_str();
const char* str = reader.get();
std::string line = std::string(str);
LOG(ERROR) << line;
char* endptr = const_cast<char*>(str);
int pos = 0;
for (size_t i = 0; i < use_slots_index_.size(); ++i) {
int idx = use_slots_index_[i];
int num = strtol(&str[pos], &endptr, 10);
PADDLE_ENFORCE(
num,
"The number of ids can not be zero, you need padding "
"it in data generator; or if there is something wrong with "
"the data, please check if the data contains unresolvable "
"characters.\nplease check this error line: %s",
str);
if (idx != -1) {
(*instance)[idx].Init(all_slots_type_[i]);
if ((*instance)[idx].GetType()[0] == 'f') { // float
for (int j = 0; j < num; ++j) {
float feasign = strtof(endptr, &endptr);
(*instance)[idx].AddValue(feasign);
}
} else if ((*instance)[idx].GetType()[0] == 'u') { // uint64
for (int j = 0; j < num; ++j) {
uint64_t feasign = (uint64_t)strtoull(endptr, &endptr, 10);
(*instance)[idx].AddValue(feasign);
}
}
pos = endptr - str;
} else {
for (int j = 0; j <= num; ++j) {
pos = line.find_first_of(' ', pos + 1);
}
}
}
return true;
}
*/
}
bool MultiSlotDataFeed::ParseOneInstance(std::vector<MultiSlotType>* instance) {
std::string line;
if (getline(file_, line)) {

@ -21,6 +21,7 @@ limitations under the License. */
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/common/ps_string.h"
#include "paddle/fluid/framework/data_feed.pb.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/reader.h"
@ -136,6 +137,7 @@ class PrivateQueueDataFeed : public DataFeed {
virtual void SetQueueSize(int queue_size);
// The reading and parsing method called in the ReadThread.
virtual bool ParseOneInstance(T* instance) = 0;
virtual bool ParseOneInstanceFromPipe(T* instance) = 0;
// This function is used to put instance to vec_ins
virtual void AddInstanceToInsVec(T* vec_ins, const T& instance,
int index) = 0;
@ -150,7 +152,9 @@ class PrivateQueueDataFeed : public DataFeed {
// ifstream one line and one line parse: 6034 ms
// fread one buffer and one buffer parse: 7097 ms
std::ifstream file_;
std::shared_ptr<FILE> fp_;
size_t queue_size_;
LineFileReader reader_;
// The queue for store parsed data
std::unique_ptr<paddle::operators::reader::BlockingQueue<T>> queue_;
};
@ -228,12 +232,15 @@ class MultiSlotDataFeed
virtual ~MultiSlotDataFeed() {}
virtual void Init(const paddle::framework::DataFeedDesc& data_feed_desc);
virtual bool CheckFile(const char* filename);
// virtual void ReadThread();
protected:
virtual void ReadThread();
virtual void AddInstanceToInsVec(std::vector<MultiSlotType>* vec_ins,
const std::vector<MultiSlotType>& instance,
int index);
virtual bool ParseOneInstance(std::vector<MultiSlotType>* instance);
virtual bool ParseOneInstanceFromPipe(std::vector<MultiSlotType>* instance);
virtual void PutToFeedVec(const std::vector<MultiSlotType>& ins_vec);
private:

@ -13,12 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/executor_thread_worker.h"
#include <stdio_ext.h>
#include <algorithm>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/common/fs.h"
#include "paddle/fluid/framework/common/shell.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
@ -244,6 +247,8 @@ void ExecutorThreadWorker::TrainFilesWithTimer() {
platform::SetNumThreads(1);
SetDevice();
thread_reader_->Start();
exit(0);
/*
std::vector<double> op_total_time;
std::vector<std::string> op_name;
for (auto& op : ops_) {
@ -287,13 +292,14 @@ void ExecutorThreadWorker::TrainFilesWithTimer() {
}
timeline.Start();
}
*/
}
void ExecutorThreadWorker::TrainFiles() {
platform::SetNumThreads(1);
// todo: configurable
SetDevice();
// SetDevice();
int fetch_var_num = fetch_var_names_.size();
fetch_values_.clear();

Loading…
Cancel
Save