Merge branch 'develop' of github.com:PaddlePaddle/Paddle into new_api_about_cpkt

wangkuiyi-patch-1
tangwei12 8 years ago
commit 55d908c9c0

@ -58,6 +58,8 @@ PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-
create mode 100644 233 create mode 100644 233
``` ```
NOTE: The `yapf` installed by `pip install pre-commit` and `conda install -c conda-forge pre-commit` is slightly different. Paddle developers use `pip install pre-commit`.
1. Build and test 1. Build and test
Users can build PaddlePaddle natively on Linux and Mac OS X. But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md). Users can build PaddlePaddle natively on Linux and Mac OS X. But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md).

@ -29,7 +29,7 @@ RUN apt-get update && \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \ python-matplotlib gcc-4.8 g++-4.8 \
automake locales clang-format swig doxygen cmake \ automake locales clang-format swig cmake \
liblapack-dev liblapacke-dev \ liblapack-dev liblapacke-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools libtool ccache && \ net-tools libtool ccache && \

@ -98,6 +98,8 @@ def parse_args():
'--use_fake_data', '--use_fake_data',
action='store_true', action='store_true',
help='If set ommit the actual read data operators.') help='If set ommit the actual read data operators.')
parser.add_argument(
'--profile', action='store_true', help='If set, profile a few steps.')
parser.add_argument( parser.add_argument(
'--update_method', '--update_method',
type=str, type=str,
@ -108,8 +110,8 @@ def parse_args():
return args return args
def append_nccl2_prepare(): def append_nccl2_prepare(trainer_id):
if os.getenv("PADDLE_TRAINER_ID", None) != None: if trainer_id >= 0:
# append gen_nccl_id at the end of startup program # append gen_nccl_id at the end of startup program
trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT") port = os.getenv("PADDLE_PSERVER_PORT")
@ -136,12 +138,12 @@ def append_nccl2_prepare():
}) })
return nccl_id_var, num_trainers, trainer_id return nccl_id_var, num_trainers, trainer_id
else: else:
raise Exception( raise Exception("must set positive PADDLE_TRAINER_ID env variables for "
"must set PADDLE_TRAINER_ID env variables for dist train.") "nccl-based dist train.")
def dist_transpile(): def dist_transpile(trainer_id):
if "PADDLE_TRAINING_ROLE" not in os.environ: if trainer_id < 0:
return None, None return None, None
# the port of all pservers, needed by both trainer and pserver # the port of all pservers, needed by both trainer and pserver
@ -158,9 +160,6 @@ def dist_transpile():
trainers = int(os.getenv("PADDLE_TRAINERS")) trainers = int(os.getenv("PADDLE_TRAINERS"))
# the IP of the local machine, needed by pserver only # the IP of the local machine, needed by pserver only
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
# the unique trainer id, starting from 0, needed by trainer
# only
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# the role, should be either PSERVER or TRAINER # the role, should be either PSERVER or TRAINER
training_role = os.getenv("PADDLE_TRAINING_ROLE") training_role = os.getenv("PADDLE_TRAINING_ROLE")
@ -295,6 +294,11 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
iters = 0 iters = 0
start_time = time.time() start_time = time.time()
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if args.profile and pass_id == 0 and batch_id == 5:
profiler.start_profiler("All")
elif args.profile and pass_id == 0 and batch_id == 10:
profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id)
if iters == args.skip_batch_num: if iters == args.skip_batch_num:
start_time = time.time() start_time = time.time()
num_samples = 0 num_samples = 0
@ -334,7 +338,11 @@ def print_arguments(args):
def main(): def main():
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
nccl_id_var, num_trainers, trainer_id = None, 1, 0
# the unique trainer id, starting from 0, needed by trainer
# only
nccl_id_var, num_trainers, trainer_id = (
None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1")))
if args.use_cprof: if args.use_cprof:
pr = cProfile.Profile() pr = cProfile.Profile()
@ -348,7 +356,7 @@ def main():
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(fluid.default_main_program())
if args.update_method == "pserver": if args.update_method == "pserver":
train_prog, startup_prog = dist_transpile() train_prog, startup_prog = dist_transpile(trainer_id)
if not train_prog: if not train_prog:
raise Exception( raise Exception(
"Must configure correct environments to run dist train.") "Must configure correct environments to run dist train.")
@ -364,7 +372,7 @@ def main():
train_args.append(fluid.default_startup_program()) train_args.append(fluid.default_startup_program())
if args.update_method == "nccl2": if args.update_method == "nccl2":
nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare() nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id)
if args.gpus == 1: if args.gpus == 1:
# NOTE: parallel executor use profiler interanlly # NOTE: parallel executor use profiler interanlly
if args.use_nvprof and args.device == 'GPU': if args.use_nvprof and args.device == 'GPU':

@ -49,7 +49,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--fluid', default=1, type=int, help='whether is fluid job') '--fluid', default=1, type=int, help='whether is fluid job')
parser.add_argument( parser.add_argument(
'--rdma', action='store_ture', help='whether mount rdma libs') '--rdma', action='store_true', help='whether mount rdma libs')
parser.add_argument( parser.add_argument(
'--disttype', '--disttype',
default="pserver", default="pserver",

@ -86,7 +86,7 @@
<br> <br>
<p align="center"> <p align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/fluid_compiler.png" width=100%> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/fluid-compiler.png" width=100%>
</p> </p>
--- ---

@ -17,3 +17,4 @@
:maxdepth: 1 :maxdepth: 1
concepts/use_concepts_cn.rst concepts/use_concepts_cn.rst
developer's_guide_to_paddle_fluid.md

@ -16,3 +16,4 @@ Here is an example of linear regression. It introduces workflow of PaddlePaddle,
:maxdepth: 1 :maxdepth: 1
concepts/index_en.rst concepts/index_en.rst
developer's_guide_to_paddle_fluid.md

@ -11,7 +11,7 @@ PaddlePaddle支持使用pip快速安装目前支持CentOS 6以上, Ubuntu 14.
pip install paddlepaddle pip install paddlepaddle
如果需要安装支持GPU的版本cuda7.5_cudnn5_avx_openblas需要执行 如果需要安装支持GPU的版本cuda8.0_cudnn5_avx_openblas需要执行
.. code-block:: bash .. code-block:: bash

@ -12,7 +12,7 @@ Simply run the following command to install, the version is cpu_avx_openblas:
pip install paddlepaddle pip install paddlepaddle
If you need to install GPU version (cuda7.5_cudnn5_avx_openblas), run: If you need to install GPU version (cuda8.0_cudnn5_avx_openblas), run:
.. code-block:: bash .. code-block:: bash

@ -51,6 +51,8 @@ Paddle 开发人员使用 [pre-commit](http://pre-commit.com/) 工具来管理 G
Paddle 使用 `clang-format` 来调整 C/C++ 源代码格式,请确保 `clang-format` 版本在 3.8 以上。 Paddle 使用 `clang-format` 来调整 C/C++ 源代码格式,请确保 `clang-format` 版本在 3.8 以上。
注:通过`pip install pre-commit`和`conda install -c conda-forge pre-commit`安装的`yapf`稍有不同的Paddle 开发人员使用的是`pip install pre-commit`。
## 开始开发 ## 开始开发
在本例中,我删除了 README.md 中的一行,并创建了一个新文件。 在本例中,我删除了 README.md 中的一行,并创建了一个新文件。

@ -13,7 +13,11 @@
# limitations under the License. # limitations under the License.
# #
function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST) if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
endif(APPLE)
function(inference_api_test TARGET_NAME TEST_SRC)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs ARGS) set(multiValueArgs ARGS)
@ -32,8 +36,10 @@ function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST)
string(REGEX REPLACE "^_$" "" arg "${arg}") string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(${TARGET_NAME} cc_test(${TARGET_NAME}
SRCS ${TEST_SRC} SRCS ${TEST_SRC}
DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl DEPS paddle_fluid_api paddle_inference_api
ARGS --dirname=${PYTHON_TESTS_DIR}/book/) ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
# TODO(panyx0178): Figure out how to add word2vec and image_classification
# as deps.
# set_tests_properties(${TARGET_NAME} # set_tests_properties(${TARGET_NAME}
# PROPERTIES DEPENDS ${DEP_TEST}) # PROPERTIES DEPENDS ${DEP_TEST})
endforeach() endforeach()
@ -41,17 +47,12 @@ endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_library(paddle_inference_api_impl
SRCS paddle_inference_api_impl.cc
DEPS paddle_inference_api paddle_fluid_api)
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api) DEPS paddle_inference_api)
inference_api_test(test_paddle_inference_api_impl inference_api_test(test_paddle_inference_api_impl
test_paddle_inference_api_impl.cc test_paddle_inference_api_impl.cc)
test_word2vec)

@ -45,9 +45,9 @@ struct PaddleTensor {
}; };
/* /*
* A simple Inference API for Paddle. Currently this API might just be used by * A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios. * non-sequence scenerios.
* TODO(Superjomn) Prepare another API for NLP-related usages. * TODO(Superjomn) Support another API for NLP-related usages.
*/ */
class PaddlePredictor { class PaddlePredictor {
public: public:
@ -66,34 +66,38 @@ class PaddlePredictor {
// be thread-safe. // be thread-safe.
virtual std::unique_ptr<PaddlePredictor> Clone() = 0; virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
virtual bool InitShared() { return false; }
// Destroy the Predictor. // Destroy the Predictor.
virtual ~PaddlePredictor() {} virtual ~PaddlePredictor() {}
friend std::unique_ptr<PaddlePredictor> CreatePaddlePredictor( enum class EngineKind {
const PaddlePredictor::Config& config); kNative = -1, // Use the native Fluid facility.
// TODO(Superjomn) support latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
// The common configs for all the predictors. // The common configs for all the predictors.
struct Config { struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory. std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on bool enable_engine{false}; // Enable to execute (part of) the model on
// third-party engines.
EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kTensorRT, // Use TensorRT for inference.
kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
}; };
}; };
struct NativeConfig : public PaddlePredictor::Config {
bool use_gpu{false};
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
}; };
// A factory to help create difference predictor. // A factory to help create difference predictor.
template <typename ConfigT> template <
typename ConfigT,
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config); std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle } // namespace paddle

@ -54,7 +54,7 @@ std::string num2str(T a) {
} }
} // namespace } // namespace
bool PaddlePredictorImpl::Init() { bool NativePaddlePredictor::Init() {
VLOG(3) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
// TODO(panyx0718): Should CPU vs GPU device be decided by id? // TODO(panyx0718): Should CPU vs GPU device be decided by id?
@ -96,14 +96,14 @@ bool PaddlePredictorImpl::Init() {
return true; return true;
} }
bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs, bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) { std::vector<PaddleTensor> *output_data) {
VLOG(3) << "Predictor::predict"; VLOG(3) << "Predictor::predict";
Timer timer; Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
std::map<std::string, const paddle::framework::LoDTensor *> feed_targets; std::map<std::string, const framework::LoDTensor *> feed_targets;
std::vector<paddle::framework::LoDTensor> feeds; std::vector<framework::LoDTensor> feeds;
if (!SetFeed(inputs, &feeds)) { if (!SetFeed(inputs, &feeds)) {
LOG(ERROR) << "fail to set feed"; LOG(ERROR) << "fail to set feed";
return false; return false;
@ -112,8 +112,8 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
feed_targets[feed_target_names_[i]] = &feeds[i]; feed_targets[feed_target_names_[i]] = &feeds[i];
} }
// get fetch variable // get fetch variable
std::map<std::string, paddle::framework::LoDTensor *> fetch_targets; std::map<std::string, framework::LoDTensor *> fetch_targets;
std::vector<paddle::framework::LoDTensor> fetchs; std::vector<framework::LoDTensor> fetchs;
fetchs.resize(fetch_target_names_.size()); fetchs.resize(fetch_target_names_.size());
for (size_t i = 0; i < fetch_target_names_.size(); ++i) { for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
fetch_targets[fetch_target_names_[i]] = &fetchs[i]; fetch_targets[fetch_target_names_[i]] = &fetchs[i];
@ -133,76 +133,33 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
return true; return true;
} }
std::unique_ptr<PaddlePredictor> PaddlePredictorImpl::Clone() { std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
VLOG(3) << "Predictor::clone"; VLOG(3) << "Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new PaddlePredictorImpl(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
if (!cls->InitShared()) {
LOG(ERROR) << "fail to call InitShared"; if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init()) {
LOG(ERROR) << "fail to call Init";
return nullptr; return nullptr;
} }
// fix manylinux compile error. // fix manylinux compile error.
return std::move(cls); return std::move(cls);
} }
// TODO(panyx0718): Consider merge with Init()? bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
bool PaddlePredictorImpl::InitShared() { std::vector<framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::init_shared";
// 1. Define place, executor, scope
if (this->config_.device >= 0) {
place_ = paddle::platform::CUDAPlace();
} else {
place_ = paddle::platform::CPUPlace();
}
this->executor_.reset(new paddle::framework::Executor(this->place_));
this->scope_.reset(new paddle::framework::Scope());
// Initialize the inference program
if (!this->config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
this->inference_program_ = paddle::inference::Load(
this->executor_.get(), this->scope_.get(), this->config_.model_dir);
} else if (!this->config_.prog_file.empty() &&
!this->config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
this->inference_program_ =
paddle::inference::Load(this->executor_.get(),
this->scope_.get(),
this->config_.prog_file,
this->config_.param_file);
}
this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0);
// 3. create variables
// TODO(panyx0718): why test share_variables.
if (config_.share_variables) {
this->executor_->CreateVariables(
*this->inference_program_, this->scope_.get(), 0);
}
// 4. Get the feed_target_names and fetch_target_names
this->feed_target_names_ = this->inference_program_->GetFeedTargetNames();
this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames();
return true;
}
bool PaddlePredictorImpl::SetFeed(
const std::vector<PaddleTensor> &inputs,
std::vector<paddle::framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::set_feed"; VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) { if (inputs.size() != feed_target_names_.size()) {
LOG(ERROR) << "wrong feed input size."; LOG(ERROR) << "wrong feed input size.";
return false; return false;
} }
for (size_t i = 0; i < feed_target_names_.size(); ++i) { for (size_t i = 0; i < feed_target_names_.size(); ++i) {
paddle::framework::LoDTensor input; framework::LoDTensor input;
paddle::framework::DDim ddim = framework::DDim ddim = framework::make_ddim(inputs[i].shape);
paddle::framework::make_ddim(inputs[i].shape);
void *input_ptr; void *input_ptr;
if (inputs[i].dtype == PaddleDType::INT64) { if (inputs[i].dtype == PaddleDType::INT64) {
input_ptr = input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
input.mutable_data<int64_t>(ddim, paddle::platform::CPUPlace());
} else if (inputs[i].dtype == PaddleDType::FLOAT32) { } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
input_ptr = input.mutable_data<float>(ddim, paddle::platform::CPUPlace()); input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
} else { } else {
LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
return false; return false;
@ -213,13 +170,12 @@ bool PaddlePredictorImpl::SetFeed(
inputs[i].data.data, inputs[i].data.data,
inputs[i].data.length); inputs[i].data.length);
feeds->push_back(input); feeds->push_back(input);
LOG(ERROR) << "Actual feed type " << feeds->back().type().name();
} }
return true; return true;
} }
bool PaddlePredictorImpl::GetFetch( bool NativePaddlePredictor::GetFetch(
const std::vector<paddle::framework::LoDTensor> &fetchs, const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *outputs) { std::vector<PaddleTensor> *outputs) {
VLOG(3) << "Predictor::get_fetch"; VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs.size()); outputs->resize(fetchs.size());
@ -284,9 +240,12 @@ bool PaddlePredictorImpl::GetFetch(
return true; return true;
} }
std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl( template <>
const VisConfig &config) { std::unique_ptr<PaddlePredictor>
VLOG(3) << "create PaddlePredictorImpl"; CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
std::vector<std::string> flags; std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f || if (config.fraction_of_gpu_memory >= 0.0f ||
@ -298,13 +257,13 @@ std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
VLOG(3) << "set flag: " << flag; VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
}
std::unique_ptr<PaddlePredictorImpl> predictor( std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
new PaddlePredictorImpl(config)); if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init()) {
if (!predictor->Init()) {
return nullptr; return nullptr;
} }
return predictor; return std::move(predictor);
} }
} // namespace paddle } // namespace paddle

@ -29,20 +29,10 @@
namespace paddle { namespace paddle {
struct VisConfig : public PaddlePredictor::Config { class NativePaddlePredictor : public PaddlePredictor {
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
/*
* Do not use this, just a demo indicating how to customize a Predictor.
*/
class PaddlePredictorImpl : public PaddlePredictor {
public: public:
explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {} explicit NativePaddlePredictor(const NativeConfig &config)
: config_(config) {}
bool Init(); bool Init();
@ -51,26 +41,22 @@ class PaddlePredictorImpl : public PaddlePredictor {
std::unique_ptr<PaddlePredictor> Clone() override; std::unique_ptr<PaddlePredictor> Clone() override;
~PaddlePredictorImpl() override{}; ~NativePaddlePredictor() override{};
private: private:
bool InitShared() override;
bool SetFeed(const std::vector<PaddleTensor> &input_datas, bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<paddle::framework::LoDTensor> *feeds); std::vector<framework::LoDTensor> *feeds);
bool GetFetch(const std::vector<paddle::framework::LoDTensor> &fetchs, bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *output_data); std::vector<PaddleTensor> *output_data);
VisConfig config_; NativeConfig config_;
paddle::platform::Place place_; platform::Place place_;
std::unique_ptr<paddle::framework::Executor> executor_; std::unique_ptr<framework::Executor> executor_;
std::unique_ptr<paddle::framework::Scope> scope_; std::unique_ptr<framework::Scope> scope_;
std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx_; std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
std::unique_ptr<paddle::framework::ProgramDesc> inference_program_; std::unique_ptr<framework::ProgramDesc> inference_program_;
std::vector<std::string> feed_target_names_; std::vector<std::string> feed_target_names_;
std::vector<std::string> fetch_target_names_; std::vector<std::string> fetch_target_names_;
}; };
std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
const VisConfig &config);
} // namespace paddle } // namespace paddle

@ -40,16 +40,20 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
return pt; return pt;
} }
TEST(paddle_inference_api_impl, word2vec) { NativeConfig GetConfig() {
VisConfig config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; config.model_dir = FLAGS_dirname + "word2vec.inference.model";
LOG(INFO) << "dirname " << config.model_dir; LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.use_gpu = true;
config.device = 0; config.device = 0;
config.share_variables = true; config.share_variables = true;
return config;
}
std::unique_ptr<PaddlePredictorImpl> predictor = TEST(paddle_inference_api_impl, word2vec) {
CreatePaddlePredictorImpl(config); NativeConfig config = GetConfig();
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
framework::LoDTensor first_word, second_word, third_word, fourth_word; framework::LoDTensor first_word, second_word, third_word, fourth_word;
framework::LoD lod{{0, 1}}; framework::LoD lod{{0, 1}};
@ -60,24 +64,90 @@ TEST(paddle_inference_api_impl, word2vec) {
SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1); SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1); SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
std::vector<PaddleTensor> cpu_feeds; std::vector<PaddleTensor> paddle_tensor_feeds;
cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word)); paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&first_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word)); paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&second_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word)); paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&third_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word)); paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs)); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
ASSERT_EQ(outputs.size(), 1UL); ASSERT_EQ(outputs.size(), 1UL);
for (size_t i = 0; i < outputs.size(); ++i) { size_t len = outputs[0].data.length;
size_t len = outputs[i].data.length; float* data = static_cast<float*>(outputs[0].data.data);
float* data = static_cast<float*>(outputs[i].data.data); for (int j = 0; j < len / sizeof(float); ++j) {
for (size_t j = 0; j < len / sizeof(float); ++j) {
ASSERT_LT(data[j], 1.0); ASSERT_LT(data[j], 1.0);
ASSERT_GT(data[j], -1.0); ASSERT_GT(data[j], -1.0);
} }
free(outputs[i].data.data);
std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&first_word);
cpu_feeds.push_back(&second_word);
cpu_feeds.push_back(&third_word);
cpu_feeds.push_back(&fourth_word);
framework::LoDTensor output1;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1);
TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);
float* lod_data = output1.data<float>();
for (size_t i = 0; i < output1.numel(); ++i) {
EXPECT_LT(lod_data[i] - data[i], 1e-3);
EXPECT_GT(lod_data[i] - data[i], -1e-3);
}
free(outputs[0].data.data);
}
TEST(paddle_inference_api_impl, image_classification) {
int batch_size = 2;
bool use_mkldnn = false;
bool repeat = false;
NativeConfig config = GetConfig();
config.model_dir =
FLAGS_dirname + "image_classification_resnet.inference.model";
const bool is_combined = false;
std::vector<std::vector<int64_t>> feed_target_shapes =
GetFeedTargetShapes(config.model_dir, is_combined);
framework::LoDTensor input;
// Use normilized image pixels as input data,
// which should be in the range [0.0, 1.0].
feed_target_shapes[0][0] = batch_size;
framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
SetupTensor<float>(
&input, input_dims, static_cast<float>(0), static_cast<float>(1));
std::vector<framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&input);
framework::LoDTensor output1;
std::vector<framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1);
TestInference<platform::CPUPlace, false, true>(config.model_dir,
cpu_feeds,
cpu_fetchs1,
repeat,
is_combined,
use_mkldnn);
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> paddle_tensor_feeds;
paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));
std::vector<PaddleTensor> outputs;
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
ASSERT_EQ(outputs.size(), 1UL);
size_t len = outputs[0].data.length;
float* data = static_cast<float*>(outputs[0].data.data);
float* lod_data = output1.data<float>();
for (size_t j = 0; j < len / sizeof(float); ++j) {
EXPECT_NEAR(lod_data[j], data[j], 1e-3);
} }
free(data);
} }
} // namespace paddle } // namespace paddle

@ -469,6 +469,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
protected: protected:
DDim GetDim(const std::string& name) const override { DDim GetDim(const std::string& name) const override {
Variable* var = scope_.FindVar(name); Variable* var = scope_.FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var);
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims(); return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {

@ -25,8 +25,10 @@ void FileReader::ReadNext(std::vector<LoDTensor> *out) {
if (out->empty()) { if (out->empty()) {
return; return;
} }
PADDLE_ENFORCE_EQ(out->size(), dims_.size());
for (size_t i = 0; i < dims_.size(); ++i) { for (size_t i = 0; i < dims_.size(); ++i) {
auto &actual = out->at(i).dims(); auto &actual = (*out)[i].dims();
auto &expect = dims_[i]; auto &expect = dims_[i];
PADDLE_ENFORCE_EQ(actual.size(), expect.size()); PADDLE_ENFORCE_EQ(actual.size(), expect.size());

@ -18,8 +18,8 @@ namespace paddle {
namespace framework { namespace framework {
struct ReAllocateVisitor { struct ReAllocateVisitor {
ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims) ReAllocateVisitor(const framework::DDim& dims, framework::Tensor* tensor)
: tensor_(tensor), dims_(dims) {} : dims_(dims), tensor_(tensor) {}
template <typename T> template <typename T>
void operator()() const { void operator()() const {
@ -34,8 +34,8 @@ struct ReAllocateVisitor {
tensor_->ShareDataWith(cpu_tensor); tensor_->ShareDataWith(cpu_tensor);
} }
framework::Tensor* tensor_;
framework::DDim dims_; framework::DDim dims_;
framework::Tensor* tensor_;
}; };
struct TensorCopyVisitor { struct TensorCopyVisitor {
@ -158,6 +158,7 @@ bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
} }
PADDLE_ENFORCE_EQ(value.dims()[0], static_cast<size_t>(1), PADDLE_ENFORCE_EQ(value.dims()[0], static_cast<size_t>(1),
"The first dim of value should be 1."); "The first dim of value should be 1.");
std::lock_guard<std::mutex> lock(*auto_grown_mutex_.get());
auto index = Index(key); auto index = Index(key);
bool is_new_key = false; bool is_new_key = false;
if (index == -1) { if (index == -1) {
@ -169,7 +170,7 @@ bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
auto dims = value_->dims(); auto dims = value_->dims();
dims[0] = (dims[0] + 1) << 1; dims[0] = (dims[0] + 1) << 1;
framework::VisitDataType(framework::ToDataType(value.type()), framework::VisitDataType(framework::ToDataType(value.type()),
ReAllocateVisitor(value_.get(), dims)); ReAllocateVisitor(dims, value_.get()));
} }
} }

@ -15,6 +15,8 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <memory>
#include <mutex> // NOLINT
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -46,11 +48,13 @@ class SelectedRows {
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height) SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) { : rows_(rows), height_(height) {
value_.reset(new Tensor()); value_.reset(new Tensor());
auto_grown_mutex_.reset(new std::mutex);
} }
SelectedRows() { SelectedRows() {
height_ = 0; height_ = 0;
value_.reset(new Tensor()); value_.reset(new Tensor());
auto_grown_mutex_.reset(new std::mutex);
} }
platform::Place place() const { return value_->place(); } platform::Place place() const { return value_->place(); }
@ -125,6 +129,7 @@ class SelectedRows {
Vector<int64_t> rows_; Vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr}; std::unique_ptr<Tensor> value_{nullptr};
int64_t height_; int64_t height_;
std::unique_ptr<std::mutex> auto_grown_mutex_{nullptr};
}; };
/* /*

@ -39,7 +39,7 @@ template <typename T>
inline const T* Tensor::data() const { inline const T* Tensor::data() const {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE(std::is_same<T, void>::value || PADDLE_ENFORCE(std::is_same<T, void>::value ||
holder_->type().hash_code() == typeid(T).hash_code(), holder_->type() == std::type_index(typeid(T)),
"Tensor holds the wrong type, it holds %s", "Tensor holds the wrong type, it holds %s",
this->holder_->type().name()); this->holder_->type().name());
@ -53,7 +53,7 @@ template <typename T>
inline T* Tensor::data() { inline T* Tensor::data() {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE(std::is_same<T, void>::value || PADDLE_ENFORCE(std::is_same<T, void>::value ||
holder_->type().hash_code() == typeid(T).hash_code(), holder_->type() == std::type_index(typeid(T)),
"Tensor holds the wrong type, it holds %s", "Tensor holds the wrong type, it holds %s",
this->holder_->type().name()); this->holder_->type().name());
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +

@ -5,14 +5,19 @@ cc_library(paddle_fluid_api
SRCS io.cc SRCS io.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
# Create static library
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(paddle_fluid DEPS ${fluid_modules})
if(WITH_CONTRIB)
set(fluid_modules "${fluid_modules}" paddle_inference_api)
endif()
# Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
DEPS ${fluid_modules}) DEPS ${fluid_modules} paddle_fluid_api)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE) if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.

@ -21,7 +21,10 @@ limitations under the License. */
#include <deque> #include <deque>
#include <stack> #include <stack>
#include <string>
#include <unordered_set> #include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/inference/analysis/graph_traits.h" #include "paddle/fluid/inference/analysis/graph_traits.h"
#include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/node.h"

@ -44,6 +44,6 @@ TEST_F(DFG_Tester, Test) {
LOG(INFO) << graph.nodes.size(); LOG(INFO) << graph.nodes.size();
} }
} // analysis }; // namespace analysis
} // inference }; // namespace inference
} // paddle }; // namespace paddle

@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {

@ -19,6 +19,8 @@
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/pass.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save