Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into fix-transpose-doc

test=develop
7 years ago · d0662bc924
parent 1c1e5ffb1a 30dfbdee7f
commit d0662bc924
68 changed files with 3304 additions and 659 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -127,6 +127,9 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
 set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
  "A path setting fluid shared and static libraries")
 set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING
  "A path setting fluid inference shared and static libraries")
 if (WITH_C_API AND WITH_PYTHON)
  message(WARNING "It is suggest not embedded a python interpreter in Paddle "
    "when using C-API. It will give an unpredictable behavior when using a "
--- a/README.md
+++ b/README.md
@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
-### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0)
+### Latest PaddlePaddle Release: [Fluid 1.0.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
 ### Install Latest Stable Release:
 ```
 # Linux CPU
@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.15.0.post85
 ## Installation
-It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website.
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) on our website.
 ## Documentation
-We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and
+We provide [English](http://paddlepaddle.org/documentation/docs/en/1.0.0/getstarted/index_en.html) and
-[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation.
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) documentation.
 - [Deep Learning 101](https://github.com/PaddlePaddle/book)
  You might want to start from this online interactive book that can run in a Jupyter Notebook.
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.0/user_guides/howto/training/cluster_howto.html)
  You can run distributed training jobs on MPI clusters.
- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html)
+- [Python API](http://paddlepaddle.org/documentation/api/zh/1.0/fluid.html)
   Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.0/advanced_usage/development/contribute_to_paddle.html)
   We appreciate your contributions!
--- a/benchmark/fluid/run.sh
+++ b/benchmark/fluid/run.sh
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -150,16 +150,16 @@ if (WITH_ANAKIN AND WITH_MKL)
        SRCS
        ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
        ${ANAKIN_INSTALL_DIR} # anakin release
-        DSTS ${dst_dir}/inference/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
+        DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
     list(APPEND inference_deps anakin_inference_lib)
 endif()
 set(module "inference")
 copy(inference_lib DEPS ${inference_deps}
  SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
-       ${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci
+       ${src_dir}/${module}/api/paddle_inference_api.h
       ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
 )
 set(module "platform")
@ -188,18 +188,38 @@ copy(cmake_cache
 # This command generates a complete fluid library for both train and inference
 add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) 
 # Following commands generate a inference-only fluid library
 # third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
 copy(third_party DEPS fluid_lib_dist
  SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
  DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
 )
 # only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
 copy(inference_api_lib DEPS fluid_lib_dist
  SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
       ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
  DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
 )
 add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
 # paddle fluid version
-execute_process(
+function(version version_file)
-  COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
+  execute_process(
-  WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
+    COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
-  OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
+    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
-set(version_file ${FLUID_INSTALL_DIR}/version.txt)
+    OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
-file(WRITE ${version_file}
+  file(WRITE ${version_file}
-  "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
+    "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
-  "WITH_MKL: ${WITH_MKL}\n"
+    "WITH_MKL: ${WITH_MKL}\n"
-  "WITH_GPU: ${WITH_GPU}\n")
+    "WITH_MKLDNN: ${WITH_MKLDNN}\n"
-if(WITH_GPU)
+    "WITH_GPU: ${WITH_GPU}\n")
-  file(APPEND ${version_file}
+  if(WITH_GPU)
-    "CUDA version: ${CUDA_VERSION}\n"
+    file(APPEND ${version_file}
-    "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
+      "CUDA version: ${CUDA_VERSION}\n"
-endif()
+      "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
  endif()
 endfunction()
 version(${FLUID_INSTALL_DIR}/version.txt)
 version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt)
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -85,6 +85,7 @@ paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name']
 paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
 paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
 paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
 paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@ -101,7 +101,7 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
-    var->GetMutable<std::vector<framework::Scope>>();
+    var->GetMutable<std::vector<framework::Scope*>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
--- a/paddle/fluid/framework/feed_fetch_method.cc
+++ b/paddle/fluid/framework/feed_fetch_method.cc
@ -27,8 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
  // be created.
  VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
  Variable* g_feed_value = scope->Var(var_name);
-  auto& feed_inputs =
+  auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
      *(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
  if (index >= feed_inputs.size()) {
    feed_inputs.resize(index + 1);
  }
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@ -37,7 +37,7 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
-    var->GetMutable<std::vector<framework::Scope>>();
+    var->GetMutable<std::vector<framework::Scope *>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
--- a/paddle/fluid/framework/op_desc.h
+++ b/paddle/fluid/framework/op_desc.h
@ -100,16 +100,6 @@ class OpDesc {
  std::vector<std::string> InputNames() const { return MapKeys(inputs_); }
  std::vector<std::string> OutputNames() const { return MapKeys(outputs_); }
  void SetInputMap(const VariableNameMap &input) {
    this->inputs_ = input;
    this->need_update_ = true;
  }
  void SetOutputMap(const VariableNameMap &output) {
    this->outputs_ = output;
    this->need_update_ = true;
  }
  const VariableNameMap &Inputs() const { return inputs_; }
  const VariableNameMap &Outputs() const { return outputs_; }
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@ -149,9 +149,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
    platform::SetDeviceId(dev_id);
 #endif
  }
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+
-  platform::RecordEvent record_event(Type(), pool.Get(place));
+  // The profile has a process-wide mutex, results in serious performance issue
-  RunImpl(scope, place);
+  // in concurrency scenerio. Here use an `if` to fix this issue.
  // Please not remove the `if`, ask @Superjomn if there are any concern.
  if (platform::IsProfileEnabled()) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::RecordEvent record_event(Type(), pool.Get(place));
    RunImpl(scope, place);
  } else {
    RunImpl(scope, place);
  }
  VLOG(3) << place << " " << DebugStringEx(&scope);
 }
--- a/paddle/fluid/framework/var_desc.h
+++ b/paddle/fluid/framework/var_desc.h
@ -59,6 +59,7 @@ class VarDesc {
 public:
  explicit VarDesc(const std::string &name) {
    desc_.set_name(name);
    // TODO(paddle-dev): Why default to lodtensor.
    desc_.mutable_type()->set_type(proto::VarType::LOD_TENSOR);
  }
--- a/paddle/fluid/framework/variable.h
+++ b/paddle/fluid/framework/variable.h
@ -38,8 +38,12 @@ class Variable {
  template <typename T>
  T* GetMutable() {
-    if (!IsType<T>()) {
+    if (!holder_) {
      holder_.reset(new PlaceholderImpl<T>(new T()));
    } else {
      PADDLE_ENFORCE(IsType<T>(),
                     "Variable must be type %s, the holding type is %s",
                     typeid(T).name(), holder_->Type().name());
    }
    return static_cast<T*>(holder_->Ptr());
  }
--- a/paddle/fluid/framework/variable_test.cc
+++ b/paddle/fluid/framework/variable_test.cc
@ -33,9 +33,10 @@ TEST(Variable, GetMutable) {
  const Tensor& tt = v->Get<Tensor>();
  EXPECT_EQ(1234, tt.content_);
-  std::string* s = v->GetMutable<std::string>();
+  try {
-  *s = "hello";
+    v->GetMutable<std::string>();
-
+  } catch (std::exception& e) {
-  const std::string& ss = v->Get<std::string>();
+    return;
-  EXPECT_EQ("hello", ss);
+  }
  EXPECT_TRUE(false);
 }
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@ -51,9 +51,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
  config.model_dir = model_path;
  config.use_gpu = false;
  config.device = 0;
-  auto predictor =
+  auto predictor = ::paddle::CreatePaddlePredictor<NativeConfig>(config);
      ::paddle::CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
          config);
  // One single batch
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@ -340,6 +340,19 @@ bool AnalysisPredictor::LoadProgramDesc() {
  }
  return true;
 }
 AnalysisPredictor::~AnalysisPredictor() {
 #if !defined(_WIN32)
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
 #endif
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
 }
 std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
  auto *x = new AnalysisPredictor(config_);
  x->Init(scope_, inference_program_);
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@ -72,6 +72,7 @@ class AnalysisPredictor : public PaddlePredictor {
  template <typename T>
  void GetFetchOne(const framework::LoDTensor &fetchs,
                   PaddleTensor *output_data);
  ~AnalysisPredictor();
 private:
  contrib::AnalysisConfig config_;
--- a/paddle/fluid/inference/api/analysis_predictor_tester.cc
+++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc
@ -27,9 +27,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
  config.model_dir = FLAGS_dirname + "/word2vec.inference.model";
  config.use_feed_fetch_ops = false;
-  auto predictor =
+  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
      CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
          config);
  auto w0 = predictor->GetInputTensor("firstw");
  auto w1 = predictor->GetInputTensor("secondw");
--- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
+++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
@ -41,11 +41,8 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
  config1.device = 0;
  config1.max_batch_size = 10;
-  auto predictor0 =
+  auto predictor0 = CreatePaddlePredictor<NativeConfig>(config0);
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);
+  auto predictor1 = CreatePaddlePredictor<MixedRTConfig>(config1);
  auto predictor1 =
      CreatePaddlePredictor<MixedRTConfig,
                            PaddleEngineKind::kAutoMixedTensorRT>(config1);
  for (int batch_id = 0; batch_id < 1; batch_id++) {
    //# 2. Prepare input.
--- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
@ -77,7 +77,7 @@ endif(NOT WIN32)
 link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
 link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
-link_directories("${PADDLE_LIB}/paddle/fluid/inference")
+link_directories("${PADDLE_LIB}/paddle/lib")
 add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
@ -97,10 +97,10 @@ endif()
 # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
 if(WITH_STATIC_LIB)
  set(DEPS
-      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+      ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
  set(DEPS
-      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+      ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
 endif()
 if (NOT WIN32)
--- a/paddle/fluid/inference/api/demo_ci/run.sh
+++ b/paddle/fluid/inference/api/demo_ci/run.sh
@ -5,12 +5,13 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
 DATA_DIR=$4 # dataset
 TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include
 TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib
 inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir
 cd `dirname $0`
 current_dir=`pwd`
 if [ $2 == ON ]; then
  # You can export yourself if move the install path
-  MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib
+  MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
 fi
 if [ $3 == ON ]; then
@ -55,7 +56,7 @@ cd build
 for WITH_STATIC_LIB in ON OFF; do
  # -----simple_on_word2vec-----
  rm -rf *
-  cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+  cmake .. -DPADDLE_LIB=${inference_install_dir} \
    -DWITH_MKL=$TURN_ON_MKL \
    -DDEMO_NAME=simple_on_word2vec \
    -DWITH_GPU=$TEST_GPU_CPU \
@ -75,7 +76,7 @@ for WITH_STATIC_LIB in ON OFF; do
  fi
  # ---------vis_demo---------
  rm -rf *
-  cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+  cmake .. -DPADDLE_LIB=${inference_install_dir} \
    -DWITH_MKL=$TURN_ON_MKL \
    -DDEMO_NAME=vis_demo \
    -DWITH_GPU=$TEST_GPU_CPU \
@ -98,7 +99,7 @@ for WITH_STATIC_LIB in ON OFF; do
  # --------tensorrt mobilenet------
  if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
    rm -rf *
-    cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+    cmake .. -DPADDLE_LIB=${inference_install_dir} \
      -DWITH_MKL=$TURN_ON_MKL \
      -DDEMO_NAME=trt_mobilenet_demo \
      -DWITH_GPU=$TEST_GPU_CPU \
--- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
@ -23,7 +23,7 @@ limitations under the License. */
 #include <memory>
 #include <thread>  //NOLINT
-#include "paddle/fluid/inference/paddle_inference_api.h"
+#include "paddle/include/paddle_inference_api.h"
 DEFINE_string(dirname, "", "Directory of the inference model.");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
@ -42,8 +42,7 @@ void Main(bool use_gpu) {
  config.use_gpu = use_gpu;
  config.fraction_of_gpu_memory = 0.15;
  config.device = 0;
-  auto predictor =
+  auto predictor = CreatePaddlePredictor<NativeConfig>(config);
      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
  for (int batch_id = 0; batch_id < 3; batch_id++) {
    //# 2. Prepare input.
@ -85,8 +84,7 @@ void MainThreads(int num_threads, bool use_gpu) {
  config.use_gpu = use_gpu;
  config.fraction_of_gpu_memory = 0.15;
  config.device = 0;
-  auto main_predictor =
+  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
  std::vector<std::thread> threads;
  for (int tid = 0; tid < num_threads; ++tid) {
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@ -18,7 +18,7 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include <glog/logging.h>  // use glog instead of CHECK to avoid importing other paddle header files.
-#include "paddle/fluid/inference/demo_ci/utils.h"
+#include "utils.h"  // NOLINT
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DEFINE_string(modeldir, "", "Directory of the inference model.");
--- a/paddle/fluid/inference/api/demo_ci/utils.h
+++ b/paddle/fluid/inference/api/demo_ci/utils.h
@ -18,7 +18,7 @@
 #include <iostream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/paddle_inference_api.h"
+#include "paddle/include/paddle_inference_api.h"
 namespace paddle {
 namespace demo {
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@ -18,7 +18,7 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include <glog/logging.h>  // use glog instead of CHECK to avoid importing other paddle header files.
-#include "paddle/fluid/inference/demo_ci/utils.h"
+#include "utils.h"  // NOLINT
 #ifdef PADDLE_WITH_CUDA
 DECLARE_double(fraction_of_gpu_memory_to_use);
@ -34,12 +34,13 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
 namespace paddle {
 namespace demo {
 using contrib::AnalysisConfig;
 /*
- * Use the native fluid engine to inference the demo.
+ * Use the native and analysis fluid engine to inference the demo.
 */
 void Main(bool use_gpu) {
-  std::unique_ptr<PaddlePredictor> predictor;
+  std::unique_ptr<PaddlePredictor> predictor, analysis_predictor;
-  NativeConfig config;
+  AnalysisConfig config;
  config.param_file = FLAGS_modeldir + "/__params__";
  config.prog_file = FLAGS_modeldir + "/__model__";
  config.use_gpu = use_gpu;
@ -49,8 +50,8 @@ void Main(bool use_gpu) {
  }
  VLOG(3) << "init predictor";
-  predictor =
+  predictor = CreatePaddlePredictor<NativeConfig>(config);
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  VLOG(3) << "begin to process data";
  // Just a single batch of data.
@ -68,7 +69,7 @@ void Main(bool use_gpu) {
  input.dtype = PaddleDType::FLOAT32;
  VLOG(3) << "run executor";
-  std::vector<PaddleTensor> output;
+  std::vector<PaddleTensor> output, analysis_output;
  predictor->Run({input}, &output, 1);
  VLOG(3) << "output.size " << output.size();
@ -77,6 +78,10 @@ void Main(bool use_gpu) {
  // compare with reference result
  CheckOutput(FLAGS_refer, tensor);
  // the analysis_output has some diff with native_output,
  // TODO(luotao): add CheckOutput for analysis_output later.
  analysis_predictor->Run({input}, &analysis_output, 1);
 }
 }  // namespace demo
--- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
@ -308,18 +308,13 @@ TEST(Analyzer_rnn1, ZeroCopy) {
  PaddlePlace place;
  int output_size{0};
-  auto predictor =
+  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
      CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
          config);
  config.use_feed_fetch_ops = true;
-  auto native_predictor =
+  auto native_predictor = CreatePaddlePredictor<NativeConfig>(config);
      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
  config.use_feed_fetch_ops = true;  // the analysis predictor needs feed/fetch.
-  auto analysis_predictor =
+  auto analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
      CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
          config);
 #define NEW_TENSOR(name__) \
  auto name__##_tensor = predictor->GetInputTensor(#name__);
--- a/Show More
+++ b/Show More