Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into fix-transpose-doc

test=develop
fix_recordio_link
guosheng 7 years ago
commit d0662bc924

@ -127,6 +127,9 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
"A path setting fluid shared and static libraries") "A path setting fluid shared and static libraries")
set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING
"A path setting fluid inference shared and static libraries")
if (WITH_C_API AND WITH_PYTHON) if (WITH_C_API AND WITH_PYTHON)
message(WARNING "It is suggest not embedded a python interpreter in Paddle " message(WARNING "It is suggest not embedded a python interpreter in Paddle "
"when using C-API. It will give an unpredictable behavior when using a " "when using C-API. It will give an unpredictable behavior when using a "

@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0) ### Latest PaddlePaddle Release: [Fluid 1.0.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
### Install Latest Stable Release: ### Install Latest Stable Release:
``` ```
# Linux CPU # Linux CPU
@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.15.0.post85
## Installation ## Installation
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website. It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) on our website.
## Documentation ## Documentation
We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and We provide [English](http://paddlepaddle.org/documentation/docs/en/1.0.0/getstarted/index_en.html) and
[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation. [Chinese](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) documentation.
- [Deep Learning 101](https://github.com/PaddlePaddle/book) - [Deep Learning 101](https://github.com/PaddlePaddle/book)
You might want to start from this online interactive book that can run in a Jupyter Notebook. You might want to start from this online interactive book that can run in a Jupyter Notebook.
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html) - [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.0/user_guides/howto/training/cluster_howto.html)
You can run distributed training jobs on MPI clusters. You can run distributed training jobs on MPI clusters.
- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html) - [Python API](http://paddlepaddle.org/documentation/api/zh/1.0/fluid.html)
Our new API enables much shorter programs. Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html) - [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.0/advanced_usage/development/contribute_to_paddle.html)
We appreciate your contributions! We appreciate your contributions!

@ -150,16 +150,16 @@ if (WITH_ANAKIN AND WITH_MKL)
SRCS SRCS
${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
${ANAKIN_INSTALL_DIR} # anakin release ${ANAKIN_INSTALL_DIR} # anakin release
DSTS ${dst_dir}/inference/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin) DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
list(APPEND inference_deps anakin_inference_lib) list(APPEND inference_deps anakin_inference_lib)
endif() endif()
set(module "inference") set(module "inference")
copy(inference_lib DEPS ${inference_deps} copy(inference_lib DEPS ${inference_deps}
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.* SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci ${src_dir}/${module}/api/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
) )
set(module "platform") set(module "platform")
@ -188,18 +188,38 @@ copy(cmake_cache
# This command generates a complete fluid library for both train and inference # This command generates a complete fluid library for both train and inference
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
# Following commands generate a inference-only fluid library
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
copy(third_party DEPS fluid_lib_dist
SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
)
# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
copy(inference_api_lib DEPS fluid_lib_dist
SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
)
add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
# paddle fluid version # paddle fluid version
execute_process( function(version version_file)
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 execute_process(
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
OUTPUT_VARIABLE PADDLE_GIT_COMMIT) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
set(version_file ${FLUID_INSTALL_DIR}/version.txt) OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
file(WRITE ${version_file} file(WRITE ${version_file}
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n" "WITH_MKL: ${WITH_MKL}\n"
"WITH_GPU: ${WITH_GPU}\n") "WITH_MKLDNN: ${WITH_MKLDNN}\n"
if(WITH_GPU) "WITH_GPU: ${WITH_GPU}\n")
file(APPEND ${version_file} if(WITH_GPU)
"CUDA version: ${CUDA_VERSION}\n" file(APPEND ${version_file}
"CUDNN version: v${CUDNN_MAJOR_VERSION}\n") "CUDA version: ${CUDA_VERSION}\n"
endif() "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
endif()
endfunction()
version(${FLUID_INSTALL_DIR}/version.txt)
version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt)

@ -85,6 +85,7 @@ paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name']
paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)) paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))

@ -101,7 +101,7 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
} else if (var_type == proto::VarType::FETCH_LIST) { } else if (var_type == proto::VarType::FETCH_LIST) {
var->GetMutable<FeedFetchList>(); var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::STEP_SCOPES) { } else if (var_type == proto::VarType::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>(); var->GetMutable<std::vector<framework::Scope*>>();
} else if (var_type == proto::VarType::LOD_RANK_TABLE) { } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>(); var->GetMutable<LoDRankTable>();
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) { } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {

@ -27,8 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
// be created. // be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index; VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name); Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
if (index >= feed_inputs.size()) { if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1); feed_inputs.resize(index + 1);
} }

@ -37,7 +37,7 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
} else if (var_type == proto::VarType::FETCH_LIST) { } else if (var_type == proto::VarType::FETCH_LIST) {
var->GetMutable<FeedFetchList>(); var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::STEP_SCOPES) { } else if (var_type == proto::VarType::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>(); var->GetMutable<std::vector<framework::Scope *>>();
} else if (var_type == proto::VarType::LOD_RANK_TABLE) { } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>(); var->GetMutable<LoDRankTable>();
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) { } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {

@ -100,16 +100,6 @@ class OpDesc {
std::vector<std::string> InputNames() const { return MapKeys(inputs_); } std::vector<std::string> InputNames() const { return MapKeys(inputs_); }
std::vector<std::string> OutputNames() const { return MapKeys(outputs_); } std::vector<std::string> OutputNames() const { return MapKeys(outputs_); }
void SetInputMap(const VariableNameMap &input) {
this->inputs_ = input;
this->need_update_ = true;
}
void SetOutputMap(const VariableNameMap &output) {
this->outputs_ = output;
this->need_update_ = true;
}
const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; } const VariableNameMap &Outputs() const { return outputs_; }

@ -149,9 +149,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place)); // The profile has a process-wide mutex, results in serious performance issue
RunImpl(scope, place); // in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
} else {
RunImpl(scope, place);
}
VLOG(3) << place << " " << DebugStringEx(&scope); VLOG(3) << place << " " << DebugStringEx(&scope);
} }

@ -59,6 +59,7 @@ class VarDesc {
public: public:
explicit VarDesc(const std::string &name) { explicit VarDesc(const std::string &name) {
desc_.set_name(name); desc_.set_name(name);
// TODO(paddle-dev): Why default to lodtensor.
desc_.mutable_type()->set_type(proto::VarType::LOD_TENSOR); desc_.mutable_type()->set_type(proto::VarType::LOD_TENSOR);
} }

@ -38,8 +38,12 @@ class Variable {
template <typename T> template <typename T>
T* GetMutable() { T* GetMutable() {
if (!IsType<T>()) { if (!holder_) {
holder_.reset(new PlaceholderImpl<T>(new T())); holder_.reset(new PlaceholderImpl<T>(new T()));
} else {
PADDLE_ENFORCE(IsType<T>(),
"Variable must be type %s, the holding type is %s",
typeid(T).name(), holder_->Type().name());
} }
return static_cast<T*>(holder_->Ptr()); return static_cast<T*>(holder_->Ptr());
} }

@ -33,9 +33,10 @@ TEST(Variable, GetMutable) {
const Tensor& tt = v->Get<Tensor>(); const Tensor& tt = v->Get<Tensor>();
EXPECT_EQ(1234, tt.content_); EXPECT_EQ(1234, tt.content_);
std::string* s = v->GetMutable<std::string>(); try {
*s = "hello"; v->GetMutable<std::string>();
} catch (std::exception& e) {
const std::string& ss = v->Get<std::string>(); return;
EXPECT_EQ("hello", ss); }
EXPECT_TRUE(false);
} }

@ -51,9 +51,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
config.model_dir = model_path; config.model_dir = model_path;
config.use_gpu = false; config.use_gpu = false;
config.device = 0; config.device = 0;
auto predictor = auto predictor = ::paddle::CreatePaddlePredictor<NativeConfig>(config);
::paddle::CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
config);
// One single batch // One single batch

@ -340,6 +340,19 @@ bool AnalysisPredictor::LoadProgramDesc() {
} }
return true; return true;
} }
AnalysisPredictor::~AnalysisPredictor() {
#if !defined(_WIN32)
if (FLAGS_profile) {
platform::DisableProfiler(platform::EventSortingKey::kTotal,
"./profile.log");
}
#endif
if (sub_scope_) {
scope_->DeleteScope(sub_scope_);
}
}
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() { std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
auto *x = new AnalysisPredictor(config_); auto *x = new AnalysisPredictor(config_);
x->Init(scope_, inference_program_); x->Init(scope_, inference_program_);

@ -72,6 +72,7 @@ class AnalysisPredictor : public PaddlePredictor {
template <typename T> template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs, void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data); PaddleTensor *output_data);
~AnalysisPredictor();
private: private:
contrib::AnalysisConfig config_; contrib::AnalysisConfig config_;

@ -27,9 +27,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
config.model_dir = FLAGS_dirname + "/word2vec.inference.model"; config.model_dir = FLAGS_dirname + "/word2vec.inference.model";
config.use_feed_fetch_ops = false; config.use_feed_fetch_ops = false;
auto predictor = auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
auto w0 = predictor->GetInputTensor("firstw"); auto w0 = predictor->GetInputTensor("firstw");
auto w1 = predictor->GetInputTensor("secondw"); auto w1 = predictor->GetInputTensor("secondw");

@ -41,11 +41,8 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
config1.device = 0; config1.device = 0;
config1.max_batch_size = 10; config1.max_batch_size = 10;
auto predictor0 = auto predictor0 = CreatePaddlePredictor<NativeConfig>(config0);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0); auto predictor1 = CreatePaddlePredictor<MixedRTConfig>(config1);
auto predictor1 =
CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config1);
for (int batch_id = 0; batch_id < 1; batch_id++) { for (int batch_id = 0; batch_id < 1; batch_id++) {
//# 2. Prepare input. //# 2. Prepare input.

@ -77,7 +77,7 @@ endif(NOT WIN32)
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib") link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
link_directories("${PADDLE_LIB}/third_party/install/glog/lib") link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/paddle/fluid/inference") link_directories("${PADDLE_LIB}/paddle/lib")
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
@ -97,10 +97,10 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if(WITH_STATIC_LIB) if(WITH_STATIC_LIB)
set(DEPS set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
else() else()
set(DEPS set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
endif() endif()
if (NOT WIN32) if (NOT WIN32)

@ -5,12 +5,13 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
DATA_DIR=$4 # dataset DATA_DIR=$4 # dataset
TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include
TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib
inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir
cd `dirname $0` cd `dirname $0`
current_dir=`pwd` current_dir=`pwd`
if [ $2 == ON ]; then if [ $2 == ON ]; then
# You can export yourself if move the install path # You can export yourself if move the install path
MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB} export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
fi fi
if [ $3 == ON ]; then if [ $3 == ON ]; then
@ -55,7 +56,7 @@ cd build
for WITH_STATIC_LIB in ON OFF; do for WITH_STATIC_LIB in ON OFF; do
# -----simple_on_word2vec----- # -----simple_on_word2vec-----
rm -rf * rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=simple_on_word2vec \ -DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$TEST_GPU_CPU \ -DWITH_GPU=$TEST_GPU_CPU \
@ -75,7 +76,7 @@ for WITH_STATIC_LIB in ON OFF; do
fi fi
# ---------vis_demo--------- # ---------vis_demo---------
rm -rf * rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=vis_demo \ -DDEMO_NAME=vis_demo \
-DWITH_GPU=$TEST_GPU_CPU \ -DWITH_GPU=$TEST_GPU_CPU \
@ -98,7 +99,7 @@ for WITH_STATIC_LIB in ON OFF; do
# --------tensorrt mobilenet------ # --------tensorrt mobilenet------
if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
rm -rf * rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=trt_mobilenet_demo \ -DDEMO_NAME=trt_mobilenet_demo \
-DWITH_GPU=$TEST_GPU_CPU \ -DWITH_GPU=$TEST_GPU_CPU \

@ -23,7 +23,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <thread> //NOLINT #include <thread> //NOLINT
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle/include/paddle_inference_api.h"
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_bool(use_gpu, false, "Whether use gpu."); DEFINE_bool(use_gpu, false, "Whether use gpu.");
@ -42,8 +42,7 @@ void Main(bool use_gpu) {
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
auto predictor = auto predictor = CreatePaddlePredictor<NativeConfig>(config);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
for (int batch_id = 0; batch_id < 3; batch_id++) { for (int batch_id = 0; batch_id < 3; batch_id++) {
//# 2. Prepare input. //# 2. Prepare input.
@ -85,8 +84,7 @@ void MainThreads(int num_threads, bool use_gpu) {
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
auto main_predictor = auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
std::vector<std::thread> threads; std::vector<std::thread> threads;
for (int tid = 0; tid < num_threads; ++tid) { for (int tid = 0; tid < num_threads; ++tid) {

@ -18,7 +18,7 @@ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files. #include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files.
#include "paddle/fluid/inference/demo_ci/utils.h" #include "utils.h" // NOLINT
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
DEFINE_string(modeldir, "", "Directory of the inference model."); DEFINE_string(modeldir, "", "Directory of the inference model.");

@ -18,7 +18,7 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle/include/paddle_inference_api.h"
namespace paddle { namespace paddle {
namespace demo { namespace demo {

@ -18,7 +18,7 @@ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files. #include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files.
#include "paddle/fluid/inference/demo_ci/utils.h" #include "utils.h" // NOLINT
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
@ -34,12 +34,13 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle { namespace paddle {
namespace demo { namespace demo {
using contrib::AnalysisConfig;
/* /*
* Use the native fluid engine to inference the demo. * Use the native and analysis fluid engine to inference the demo.
*/ */
void Main(bool use_gpu) { void Main(bool use_gpu) {
std::unique_ptr<PaddlePredictor> predictor; std::unique_ptr<PaddlePredictor> predictor, analysis_predictor;
NativeConfig config; AnalysisConfig config;
config.param_file = FLAGS_modeldir + "/__params__"; config.param_file = FLAGS_modeldir + "/__params__";
config.prog_file = FLAGS_modeldir + "/__model__"; config.prog_file = FLAGS_modeldir + "/__model__";
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
@ -49,8 +50,8 @@ void Main(bool use_gpu) {
} }
VLOG(3) << "init predictor"; VLOG(3) << "init predictor";
predictor = predictor = CreatePaddlePredictor<NativeConfig>(config);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config); analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
VLOG(3) << "begin to process data"; VLOG(3) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
@ -68,7 +69,7 @@ void Main(bool use_gpu) {
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor"; VLOG(3) << "run executor";
std::vector<PaddleTensor> output; std::vector<PaddleTensor> output, analysis_output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size(); VLOG(3) << "output.size " << output.size();
@ -77,6 +78,10 @@ void Main(bool use_gpu) {
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
// the analysis_output has some diff with native_output,
// TODO(luotao): add CheckOutput for analysis_output later.
analysis_predictor->Run({input}, &analysis_output, 1);
} }
} // namespace demo } // namespace demo

@ -308,18 +308,13 @@ TEST(Analyzer_rnn1, ZeroCopy) {
PaddlePlace place; PaddlePlace place;
int output_size{0}; int output_size{0};
auto predictor = auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
config.use_feed_fetch_ops = true; config.use_feed_fetch_ops = true;
auto native_predictor = auto native_predictor = CreatePaddlePredictor<NativeConfig>(config);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
config.use_feed_fetch_ops = true; // the analysis predictor needs feed/fetch. config.use_feed_fetch_ops = true; // the analysis predictor needs feed/fetch.
auto analysis_predictor = auto analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
#define NEW_TENSOR(name__) \ #define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__); auto name__##_tensor = predictor->GetInputTensor(#name__);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save