Merge remote-tracking branch 'origin/develop' into feature/ir_inplace_pass

revert-15296-async_double_buffered_py_reader
dzhwinter 6 years ago
commit 06f2448848

@ -212,7 +212,7 @@ endif()
if (WITH_JEMALLOC)
find_package(JeMalloc REQUIRED)
include_directories(${JEMALLOC_INCLUDE_DIR})
add_definitions(-DWITH_JEMALLOC)
add_definitions(-DPADDLE_WITH_JEMALLOC)
endif()
include(generic) # simplify cmake module
@ -276,9 +276,3 @@ add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
endif()
if(WITH_DOC)
find_package(Sphinx REQUIRED)
find_python_module(recommonmark REQUIRED)
add_subdirectory(doc)
endif()

@ -11,12 +11,10 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub
# ENV variables
ARG WITH_GPU
ARG WITH_AVX
ARG WITH_DOC
ENV WOBOQ OFF
ENV WITH_GPU=${WITH_GPU:-ON}
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV HOME /root
# Add bash enhancements

@ -1,147 +0,0 @@
# - This module looks for Sphinx
# Find the Sphinx documentation generator
#
# This modules defines
# SPHINX_EXECUTABLE
# SPHINX_FOUND
find_program(SPHINX_EXECUTABLE
NAMES sphinx-build
PATHS
/usr/bin
/usr/local/bin
/opt/local/bin
DOC "Sphinx documentation generator"
)
if( NOT SPHINX_EXECUTABLE )
set(_Python_VERSIONS
2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0 1.6 1.5
)
foreach( _version ${_Python_VERSIONS} )
set( _sphinx_NAMES sphinx-build-${_version} )
find_program( SPHINX_EXECUTABLE
NAMES ${_sphinx_NAMES}
PATHS
/usr/bin
/usr/local/bin
/opt/loca/bin
DOC "Sphinx documentation generator"
)
endforeach()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Sphinx DEFAULT_MSG
SPHINX_EXECUTABLE
)
option( SPHINX_HTML_OUTPUT "Build a single HTML with the whole content." ON )
option( SPHINX_DIRHTML_OUTPUT "Build HTML pages, but with a single directory per document." OFF )
option( SPHINX_HTMLHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in htmlhelp." OFF )
option( SPHINX_QTHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in qthelp." OFF )
option( SPHINX_DEVHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in devhelp." OFF )
option( SPHINX_EPUB_OUTPUT "Build HTML pages with additional information for building a documentation collection in epub." OFF )
option( SPHINX_LATEX_OUTPUT "Build LaTeX sources that can be compiled to a PDF document using pdflatex." OFF )
option( SPHINX_MAN_OUTPUT "Build manual pages in groff format for UNIX systems." OFF )
option( SPHINX_TEXT_OUTPUT "Build plain text files." OFF )
mark_as_advanced(
SPHINX_EXECUTABLE
SPHINX_HTML_OUTPUT
SPHINX_DIRHTML_OUTPUT
SPHINX_HTMLHELP_OUTPUT
SPHINX_QTHELP_OUTPUT
SPHINX_DEVHELP_OUTPUT
SPHINX_EPUB_OUTPUT
SPHINX_LATEX_OUTPUT
SPHINX_MAN_OUTPUT
SPHINX_TEXT_OUTPUT
)
function( Sphinx_add_target target_name builder conf cache source destination )
add_custom_target( ${target_name} ALL
COMMAND ${SPHINX_EXECUTABLE} -b ${builder}
-d ${cache}
-c ${conf}
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND cd ${destination} && ln -sf ./index_*.html index.html
)
set_property(
DIRECTORY APPEND PROPERTY
ADDITIONAL_MAKE_CLEAN_FILES
${destination}
)
endfunction()
# Target dependencies can be optionally listed at the end.
function( Sphinx_add_targets target_base_name conf source base_destination )
set( _dependencies )
foreach( arg IN LISTS ARGN )
set( _dependencies ${_dependencies} ${arg} )
endforeach()
if( ${SPHINX_HTML_OUTPUT} )
Sphinx_add_target( ${target_base_name}_html html ${conf} ${source} ${base_destination}/html )
add_dependencies( ${target_base_name}_html ${_dependencies} )
endif()
if( ${SPHINX_DIRHTML_OUTPUT} )
Sphinx_add_target( ${target_base_name}_dirhtml dirhtml ${conf} ${source} ${base_destination}/dirhtml )
add_dependencies( ${target_base_name}_dirhtml ${_dependencies} )
endif()
if( ${SPHINX_QTHELP_OUTPUT} )
Sphinx_add_target( ${target_base_name}_qthelp qthelp ${conf} ${source} ${base_destination}/qthelp )
add_dependencies( ${target_base_name}_qthelp ${_dependencies} )
endif()
if( ${SPHINX_DEVHELP_OUTPUT} )
Sphinx_add_target( ${target_base_name}_devhelp devhelp ${conf} ${source} ${base_destination}/devhelp )
add_dependencies( ${target_base_name}_devhelp ${_dependencies} )
endif()
if( ${SPHINX_EPUB_OUTPUT} )
Sphinx_add_target( ${target_base_name}_epub epub ${conf} ${source} ${base_destination}/epub )
add_dependencies( ${target_base_name}_epub ${_dependencies} )
endif()
if( ${SPHINX_LATEX_OUTPUT} )
Sphinx_add_target( ${target_base_name}_latex latex ${conf} ${source} ${base_destination}/latex )
add_dependencies( ${target_base_name}_latex ${_dependencies} )
endif()
if( ${SPHINX_MAN_OUTPUT} )
Sphinx_add_target( ${target_base_name}_man man ${conf} ${source} ${base_destination}/man )
add_dependencies( ${target_base_name}_man ${_dependencies} )
endif()
if( ${SPHINX_TEXT_OUTPUT} )
Sphinx_add_target( ${target_base_name}_text text ${conf} ${source} ${base_destination}/text )
add_dependencies( ${target_base_name}_text ${_dependencies} )
endif()
if( ${BUILD_TESTING} )
sphinx_add_target( ${target_base_name}_linkcheck linkcheck ${conf} ${source} ${base_destination}/linkcheck )
add_dependencies( ${target_base_name}_linkcheck ${_dependencies} )
endif()
endfunction()

@ -325,6 +325,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

@ -555,18 +555,17 @@ Tensor* ExecutionContext::LegacyOutput<Tensor>(const std::string& name) const {
template <>
std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const {
auto names = op().Outputs(name);
auto it = ctx_.outputs.find(name);
if (it == ctx_.outputs.end()) {
return {};
}
const std::vector<Variable*>& vars = it->second;
std::vector<Tensor*> res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) -> Tensor* {
auto var = scope_.FindVar(sub_name);
if (var == nullptr) return nullptr;
PADDLE_ENFORCE(
var->IsType<LoDTensor>(),
"%s should be LoDTensor, but the received type is %s",
sub_name, ToTypeName(var->Type()));
return var->GetMutable<LoDTensor>();
res.reserve(vars.size());
std::transform(vars.begin(), vars.end(), std::back_inserter(res),
[&](Variable* var) -> Tensor* {
return var == nullptr ? nullptr
: var->GetMutable<LoDTensor>();
});
return res;
}

@ -156,6 +156,8 @@ class Autograd {
for (auto it : candidate->pre_ops_) {
for (OpBase* pre_op : it.second) {
if (!pre_op) continue;
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
<< it.first << " <---- " << pre_op->op_desc_->Type();
if (visited.find(pre_op) == visited.end()) {
visited.insert(pre_op);
queue.push_back(pre_op);

@ -28,6 +28,7 @@
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/imperative/type_defs.h"
@ -140,16 +141,24 @@ class VarBase {
void RunBackward();
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool stop_gradient) {
int pre_op_out_idx, bool pre_op_stop_gradient) {
pre_op_ = pre_op;
pre_op_out_name_ = pre_op_out_name;
pre_op_out_idx_ = pre_op_out_idx;
stop_gradient_ = stop_gradient;
if (pre_op_stop_gradient) {
stop_gradient_ = pre_op_stop_gradient;
}
}
void ClearGradient() {
delete grads_;
grads_ = new VarBase(true);
VLOG(1) << "clear gradient of " << var_desc_->Name();
if (grads_ && grads_->var_ && grads_->var_->IsInitialized()) {
auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
operators::math::set_constant(
*(platform::DeviceContextPool::Instance().Get(
grads_->var_->Get<framework::LoDTensor>().place())),
grads_t, 0.0);
}
}
framework::LoDTensor& GradValue();

@ -31,6 +31,7 @@ void CreateGradOp(const framework::OpDesc& op_desc,
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
for (auto& desc : descs) {
grad_op_descs->emplace_back(desc.release());
}
@ -84,11 +85,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->input_vars_ = inputs;
for (auto it : op->input_vars_) {
auto& invars = invars_map[it.first];
invars.reserve(it.second.size());
for (VarBase* inp : it.second) {
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr",
op->op_desc_->Type(), inp->var_desc_->Name());
invars.push_back(inp->var_);
invars.emplace_back(inp->var_);
vars[inp->var_desc_->Name()] = inp;
if (inp->PreOp()) {
op->pre_ops_[it.first].push_back(inp->PreOp());
@ -105,9 +107,10 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
for (auto it : op->output_vars_) {
auto& outvars = outvars_map[it.first];
const std::vector<VarBase*>& outputs = it.second;
outvars.reserve(outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
outvars.push_back(out->var_);
outvars.emplace_back(out->var_);
vars[out->var_desc_->Name()] = out;
framework::VarDesc* var_desc = block->FindVar(out->var_desc_->Name());

@ -132,7 +132,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(tensorrt_precision_mode, TensorRtPrecisionMode,
contrib::AnalysisConfig::Precision);
AnalysisConfig::Precision);
// Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);

@ -32,7 +32,7 @@ limitations under the License. */
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#define GCC_ATTRIBUTE(attr__) ;
#define GCC_ATTRIBUTE(attr__)
#define MKDIR(path) _mkdir(path)
#else
#include <unistd.h>

@ -71,7 +71,7 @@ void IRPassManager::CreatePasses(Argument *argument,
new framework::ProgramDesc *(&argument->main_program()));
bool enable_int8 = argument->tensorrt_precision_mode() ==
contrib::AnalysisConfig::Precision::kInt8;
AnalysisConfig::Precision::kInt8;
pass->Set("enable_int8", new bool(enable_int8));
std::string model_opt_cache_dir =

@ -13,7 +13,9 @@
// limitations under the License.
#pragma once
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/port.h"

@ -22,7 +22,7 @@
namespace paddle {
PassStrategy *contrib::AnalysisConfig::pass_builder() const {
PassStrategy *AnalysisConfig::pass_builder() const {
if (!pass_builder_.get()) {
if (use_gpu_) {
LOG(INFO) << "Create GPU IR passes";
@ -42,27 +42,27 @@ PassStrategy *contrib::AnalysisConfig::pass_builder() const {
return pass_builder_.get();
}
contrib::AnalysisConfig::AnalysisConfig(const std::string &model_dir) {
AnalysisConfig::AnalysisConfig(const std::string &model_dir) {
model_dir_ = model_dir;
Update();
}
contrib::AnalysisConfig::AnalysisConfig(const std::string &prog_file,
const std::string &params_file) {
AnalysisConfig::AnalysisConfig(const std::string &prog_file,
const std::string &params_file) {
prog_file_ = prog_file;
params_file_ = params_file;
Update();
}
void contrib::AnalysisConfig::SetModel(const std::string &prog_file_path,
const std::string &params_file_path) {
void AnalysisConfig::SetModel(const std::string &prog_file_path,
const std::string &params_file_path) {
prog_file_ = prog_file_path;
params_file_ = params_file_path;
Update();
}
void contrib::AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
int device_id) {
void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
int device_id) {
#ifdef PADDLE_WITH_CUDA
use_gpu_ = true;
memory_pool_init_size_mb_ = memory_pool_init_size_mb;
@ -74,13 +74,13 @@ void contrib::AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
Update();
}
void contrib::AnalysisConfig::DisableGpu() {
void AnalysisConfig::DisableGpu() {
use_gpu_ = false;
Update();
}
contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
#define CP_MEMBER(member__) member__ = other.member__;
// Model related.
@ -130,7 +130,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
Update();
}
void contrib::AnalysisConfig::EnableMKLDNN() {
void AnalysisConfig::EnableMKLDNN() {
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMKLDNN();
use_mkldnn_ = true;
@ -142,9 +142,9 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
Update();
}
void contrib::AnalysisConfig::EnableTensorRtEngine(
void AnalysisConfig::EnableTensorRtEngine(
int workspace_size, int max_batch_size, int min_subgraph_size,
contrib::AnalysisConfig::Precision precision_mode) {
AnalysisConfig::Precision precision_mode) {
#ifdef PADDLE_WITH_CUDA
if (!use_gpu()) {
LOG(ERROR) << "To use TensorRT engine, please call EnableGpu() first";
@ -165,7 +165,7 @@ void contrib::AnalysisConfig::EnableTensorRtEngine(
}
// TODO(Superjomn) refactor this, buggy.
void contrib::AnalysisConfig::Update() {
void AnalysisConfig::Update() {
auto info = SerializeInfoCache();
if (info == serialized_info_cache_) return;
@ -225,7 +225,7 @@ void contrib::AnalysisConfig::Update() {
}
}
std::string contrib::AnalysisConfig::SerializeInfoCache() {
std::string AnalysisConfig::SerializeInfoCache() {
std::stringstream ss;
ss << model_dir_;
ss << prog_file_;
@ -260,14 +260,14 @@ std::string contrib::AnalysisConfig::SerializeInfoCache() {
return ss.str();
}
void contrib::AnalysisConfig::SetCpuMathLibraryNumThreads(
void AnalysisConfig::SetCpuMathLibraryNumThreads(
int cpu_math_library_num_threads) {
cpu_math_library_num_threads_ = cpu_math_library_num_threads;
Update();
}
float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
#ifdef PADDLE_WITH_CUDA
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
@ -282,8 +282,8 @@ float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
#endif
}
void contrib::AnalysisConfig::EnableMemoryOptim(
bool static_optim, bool force_update_static_cache) {
void AnalysisConfig::EnableMemoryOptim(bool static_optim,
bool force_update_static_cache) {
enable_memory_optim_ = true;
static_memory_optim_ = static_optim;
static_memory_optim_force_update_ = force_update_static_cache;
@ -291,14 +291,14 @@ void contrib::AnalysisConfig::EnableMemoryOptim(
Update();
}
bool contrib::AnalysisConfig::enable_memory_optim() const {
bool AnalysisConfig::enable_memory_optim() const {
return enable_memory_optim_;
}
void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer,
size_t prog_buffer_size,
const char *param_buffer,
size_t param_buffer_size) {
void AnalysisConfig::SetModelBuffer(const char *prog_buffer,
size_t prog_buffer_size,
const char *param_buffer,
size_t param_buffer_size) {
prog_file_ = std::string(prog_buffer, prog_buffer + prog_buffer_size);
params_file_ = std::string(param_buffer, param_buffer + param_buffer_size);
model_from_memory_ = true;
@ -306,7 +306,7 @@ void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer,
Update();
}
NativeConfig contrib::AnalysisConfig::ToNativeConfig() const {
NativeConfig AnalysisConfig::ToNativeConfig() const {
NativeConfig config;
config.model_dir = model_dir_;
config.prog_file = prog_file_;

@ -47,7 +47,6 @@ DECLARE_bool(profile);
namespace paddle {
using contrib::AnalysisConfig;
using inference::Singleton;
#if PADDLE_WITH_TENSORRT
using inference::tensorrt::TRTInt8Calibrator;
@ -123,6 +122,15 @@ bool AnalysisPredictor::PrepareProgram(
if (!program) {
if (!LoadProgramDesc()) return false;
// If not cloned, the parameters should be loaded.
// If config_.ir_optim() is True, parameters is loaded in
// OptimizeInferenceProgram(), but other persistable variables
// (like RAW type var) are not created in scope.
// If config_.ir_optim() is False, parameters is loaded in LoadParameters(),
// still need to create other persistable variables.
// So in both case, create persistable variables at first.
executor_->CreateVariables(*inference_program_, 0, true, sub_scope_);
// Optimize the program, and load parameters and modify them in the
// scope_.
// This will change the scope_ address.
@ -130,15 +138,6 @@ bool AnalysisPredictor::PrepareProgram(
status_ir_optim_enabled_ = true;
OptimizeInferenceProgram();
} else {
// If the parent_scope is passed, we assert that the persistable variables
// are already created, so just create the no persistable variables.
// If not cloned, the parameters should be loaded
// OptimizeInferenceProgram.
// So in both cases, just the local variables are needed to load, not the
// parematers.
executor_->CreateVariables(*inference_program_, 0, true, sub_scope_);
// Load parameters
LOG(INFO) << "load parameters ";
LoadParameters();
@ -376,7 +375,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
argument_.SetIrAnalysisPasses(passes);
argument_.SetAnalysisPasses(config_.pass_builder()->AnalysisPasses());
argument_.SetScopeNotOwned(const_cast<framework::Scope *>(scope_.get()));
argument_.SetScopeNotOwned(scope_.get());
Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid());
@ -731,10 +730,10 @@ std::string AnalysisPredictor::GetSeriazlizedProgram() const {
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
const contrib::AnalysisConfig &config) {
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
const AnalysisConfig &config) {
return CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
}
} // namespace paddle

@ -33,7 +33,6 @@ using inference::analysis::Argument;
using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
using framework::NaiveExecutor;
using contrib::AnalysisConfig;
/** \brief This predictor is based on the original native predictor with IR and
* Analysis support.
@ -123,7 +122,7 @@ class AnalysisPredictor : public PaddlePredictor {
#endif
private:
contrib::AnalysisConfig config_;
AnalysisConfig config_;
Argument argument_;
std::unique_ptr<NaiveExecutor> executor_;
platform::Place place_;

@ -24,7 +24,6 @@
DEFINE_string(dirname, "", "dirname to tests.");
namespace paddle {
using contrib::AnalysisConfig;
TEST(AnalysisPredictor, analysis_off) {
AnalysisConfig config;

@ -295,7 +295,7 @@ TEST(inference_api_native, image_classification_gpu) {
#endif
TEST(PassBuilder, Delete) {
contrib::AnalysisConfig config;
AnalysisConfig config;
config.DisableGpu();
config.pass_builder()->DeletePass("attention_lstm_fuse_pass");
const auto& passes = config.pass_builder()->AllPasses();

@ -36,7 +36,7 @@ namespace demo {
*/
void Main() {
std::unique_ptr<PaddlePredictor> predictor;
paddle::contrib::AnalysisConfig config;
paddle::AnalysisConfig config;
config.EnableUseGpu(100, 0);
config.SetModel(FLAGS_modeldir + "/__model__",
FLAGS_modeldir + "/__params__");

@ -34,7 +34,6 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle {
namespace demo {
using contrib::AnalysisConfig;
/*
* Use the native and analysis fluid engine to inference the demo.
*/

@ -29,11 +29,6 @@
namespace paddle {
class AnalysisPredictor;
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace contrib {
// NOTE WIP, not stable yet.
struct AnalysisConfig {
@ -260,5 +255,4 @@ struct AnalysisConfig {
mutable std::unique_ptr<PassStrategy> pass_builder_;
};
} // namespace contrib
} // namespace paddle

@ -221,7 +221,7 @@ class PaddlePredictor {
virtual std::string GetSeriazlizedProgram() const {
assert(false); // Force raise error.
return "NotImplemented";
};
}
/** The common configs for all the predictors.
*/

@ -13,16 +13,16 @@
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <cuda_runtime_api.h>
#include <atomic>
#include <memory>
#include <mutex>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <NvInfer.h>
#include <cuda_runtime_api.h>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/platform/place.h"

@ -128,6 +128,11 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv
"${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL)
# bert, max_len=20
set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert20")
download_model_and_data(${BERT_INSTALL_DIR} "bert_model.tar.gz" "bert_data_len20.txt.tar.gz")
inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc SERIAL)
# anakin
if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
# anakin rnn1

@ -0,0 +1,223 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
using paddle::PaddleTensor;
template <typename T>
void GetValueFromStream(std::stringstream *ss, T *t) {
(*ss) >> (*t);
}
template <>
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}
// Split string to vector
template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) {
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
template <typename T>
constexpr paddle::PaddleDType GetPaddleDType();
template <>
constexpr paddle::PaddleDType GetPaddleDType<int64_t>() {
return paddle::PaddleDType::INT64;
}
template <>
constexpr paddle::PaddleDType GetPaddleDType<float>() {
return paddle::PaddleDType::FLOAT32;
}
// Parse tensor from string
template <typename T>
bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) {
std::vector<std::string> data;
Split(field, ':', &data);
if (data.size() < 2) return false;
std::string shape_str = data[0];
std::vector<int> shape;
Split(shape_str, ' ', &shape);
std::string mat_str = data[1];
std::vector<T> mat;
Split(mat_str, ' ', &mat);
tensor->shape = shape;
auto size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()) *
sizeof(T);
tensor->data.Resize(size);
std::copy(mat.begin(), mat.end(), static_cast<T *>(tensor->data.data()));
tensor->dtype = GetPaddleDType<T>();
return true;
}
// Parse input tensors from string
bool ParseLine(const std::string &line,
std::vector<paddle::PaddleTensor> *tensors) {
std::vector<std::string> fields;
Split(line, ';', &fields);
if (fields.size() < 5) return false;
tensors->clear();
tensors->reserve(5);
int i = 0;
// src_id
paddle::PaddleTensor src_id;
ParseTensor<int64_t>(fields[i++], &src_id);
tensors->push_back(src_id);
// pos_id
paddle::PaddleTensor pos_id;
ParseTensor<int64_t>(fields[i++], &pos_id);
tensors->push_back(pos_id);
// segment_id
paddle::PaddleTensor segment_id;
ParseTensor<int64_t>(fields[i++], &segment_id);
tensors->push_back(segment_id);
// self_attention_bias
paddle::PaddleTensor self_attention_bias;
ParseTensor<float>(fields[i++], &self_attention_bias);
tensors->push_back(self_attention_bias);
// next_segment_index
paddle::PaddleTensor next_segment_index;
ParseTensor<int64_t>(fields[i++], &next_segment_index);
tensors->push_back(next_segment_index);
return true;
}
bool LoadInputData(std::vector<std::vector<paddle::PaddleTensor>> *inputs) {
if (FLAGS_infer_data.empty()) {
LOG(ERROR) << "please set input data path";
return false;
}
std::ifstream fin(FLAGS_infer_data);
std::string line;
int sample = 0;
// The unit-test dataset only have 10 samples, each sample have 5 feeds.
while (std::getline(fin, line)) {
std::vector<paddle::PaddleTensor> feed_data;
ParseLine(line, &feed_data);
inputs->push_back(std::move(feed_data));
sample++;
if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break;
}
LOG(INFO) << "number of samples: " << sample;
return true;
}
void SetConfig(AnalysisConfig *config) { config->SetModel(FLAGS_infer_model); }
void profile(bool use_mkldnn = false) {
AnalysisConfig config;
SetConfig(&config);
if (use_mkldnn) {
config.EnableMKLDNN();
}
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),
inputs, &outputs, FLAGS_num_threads);
}
TEST(Analyzer_bert, profile) { profile(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_bert, profile_mkldnn) { profile(true); }
#endif
// Check the fuse status
TEST(Analyzer_bert, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
LOG(INFO) << "num_ops: " << num_ops;
}
// Compare result of NativeConfig and AnalysisConfig
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
}
TEST(Analyzer_bert, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_bert, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif
// Compare Deterministic result
TEST(Analyzer_bert, compare_determine) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
inputs);
}
} // namespace inference
} // namespace paddle

@ -19,7 +19,6 @@ DEFINE_int32(max_turn_num, 9,
namespace paddle {
namespace inference {
using contrib::AnalysisConfig;
constexpr int32_t kMaxTurnLen = 50;
@ -165,7 +164,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots->push_back(std::move(response_mask_tensor));
}
void SetConfig(contrib::AnalysisConfig *cfg) {
void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
cfg->SwitchSpecifyInputNames();
cfg->SwitchIrOptim(true);
@ -187,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
void profile(bool use_mkldnn = false) {
contrib::AnalysisConfig cfg;
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
@ -223,7 +222,7 @@ TEST(Analyzer_dam, profile_mkldnn) { profile(true /* use_mkldnn */); }
// Check the fuse status
TEST(Analyzer_dam, fuse_statis) {
contrib::AnalysisConfig cfg;
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
@ -256,7 +255,7 @@ void compare(bool use_mkldnn = false) {
TEST(Analyzer_dam, compare_with_static_memory_optim) {
// The small dam will core in CI, but works in local.
if (FLAGS_max_turn_num == 9) {
contrib::AnalysisConfig cfg, cfg1;
AnalysisConfig cfg, cfg1;
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<std::vector<PaddleTensor>> input_slots_all;
@ -282,7 +281,7 @@ TEST(Analyzer_dam, compare_with_static_memory_optim) {
TEST(Analyzer_dam, compare_with_dynamic_memory_optim) {
// The small dam will core in CI, but works in local.
if (FLAGS_max_turn_num == 9) {
contrib::AnalysisConfig cfg, cfg1;
AnalysisConfig cfg, cfg1;
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<std::vector<PaddleTensor>> input_slots_all;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save