|
|
|
@ -33,6 +33,7 @@
|
|
|
|
|
#include "paddle/fluid/inference/utils/singleton.h"
|
|
|
|
|
#include "paddle/fluid/memory/memcpy.h"
|
|
|
|
|
#include "paddle/fluid/platform/cpu_helper.h"
|
|
|
|
|
#include "paddle/fluid/platform/gpu_info.h"
|
|
|
|
|
#include "paddle/fluid/platform/profiler.h"
|
|
|
|
|
|
|
|
|
|
DECLARE_bool(profile);
|
|
|
|
@ -59,8 +60,8 @@ bool AnalysisPredictor::Init(
|
|
|
|
|
if (FLAGS_profile) {
|
|
|
|
|
LOG(WARNING) << "Profiler is actived, might affect the performance";
|
|
|
|
|
LOG(INFO) << "You can turn off by set gflags '-profile false'";
|
|
|
|
|
auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
|
|
|
|
|
: platform::ProfilerState::kCPU;
|
|
|
|
|
auto tracking_device = config_.use_gpu() ? platform::ProfilerState::kAll
|
|
|
|
|
: platform::ProfilerState::kCPU;
|
|
|
|
|
platform::EnableProfiler(tracking_device);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -112,7 +113,7 @@ bool AnalysisPredictor::PrepareProgram(
|
|
|
|
|
// Optimize the program, and load parameters and modify them in the
|
|
|
|
|
// scope_.
|
|
|
|
|
// This will change the scope_ address.
|
|
|
|
|
if (config_.enable_ir_optim) {
|
|
|
|
|
if (config_.ir_optim()) {
|
|
|
|
|
status_ir_optim_enabled_ = true;
|
|
|
|
|
OptimizeInferenceProgram();
|
|
|
|
|
} else {
|
|
|
|
@ -140,9 +141,9 @@ bool AnalysisPredictor::PrepareProgram(
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
bool AnalysisPredictor::CreateExecutor() {
|
|
|
|
|
if (config_.use_gpu) {
|
|
|
|
|
if (config_.use_gpu_) {
|
|
|
|
|
status_use_gpu_ = true;
|
|
|
|
|
place_ = paddle::platform::CUDAPlace(config_.device);
|
|
|
|
|
place_ = paddle::platform::CUDAPlace(config_.device_id_);
|
|
|
|
|
} else {
|
|
|
|
|
place_ = paddle::platform::CPUPlace();
|
|
|
|
|
}
|
|
|
|
@ -151,7 +152,7 @@ bool AnalysisPredictor::CreateExecutor() {
|
|
|
|
|
}
|
|
|
|
|
bool AnalysisPredictor::PrepareExecutor() {
|
|
|
|
|
executor_->Prepare(sub_scope_, *inference_program_, 0,
|
|
|
|
|
config_.use_feed_fetch_ops);
|
|
|
|
|
config_.use_feed_fetch_ops_);
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
|
|
|
|
|
|
|
|
|
@ -250,7 +251,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
|
|
|
|
|
}
|
|
|
|
|
input.set_lod(lod);
|
|
|
|
|
int idx = -1;
|
|
|
|
|
if (config_.specify_input_name) {
|
|
|
|
|
if (config_.specify_input_name_) {
|
|
|
|
|
auto name = inputs[i].name;
|
|
|
|
|
if (feed_names_.find(name) == feed_names_.end()) {
|
|
|
|
|
LOG(ERROR) << "feed names from program do not have name: [" << name
|
|
|
|
@ -314,22 +315,22 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
|
|
|
|
|
void AnalysisPredictor::OptimizeInferenceProgram() {
|
|
|
|
|
status_program_optimized_ = true;
|
|
|
|
|
|
|
|
|
|
argument_.SetUseGPU(config_.use_gpu);
|
|
|
|
|
argument_.SetGPUDeviceId(config_.device);
|
|
|
|
|
argument_.SetUseGPU(config_.use_gpu());
|
|
|
|
|
argument_.SetGPUDeviceId(config_.gpu_device_id());
|
|
|
|
|
argument_.SetModelFromMemory(config_.model_from_memory_);
|
|
|
|
|
// Analyze inference_program
|
|
|
|
|
if (!config_.model_dir.empty()) {
|
|
|
|
|
argument_.SetModelDir(config_.model_dir);
|
|
|
|
|
if (!config_.model_dir().empty()) {
|
|
|
|
|
argument_.SetModelDir(config_.model_dir());
|
|
|
|
|
} else {
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
!config_.param_file.empty(),
|
|
|
|
|
!config_.params_file().empty(),
|
|
|
|
|
"Either model_dir or (param_file, prog_file) should be set.");
|
|
|
|
|
PADDLE_ENFORCE(!config_.prog_file.empty());
|
|
|
|
|
argument_.SetModelProgramPath(config_.prog_file);
|
|
|
|
|
argument_.SetModelParamsPath(config_.param_file);
|
|
|
|
|
PADDLE_ENFORCE(!config_.prog_file().empty());
|
|
|
|
|
argument_.SetModelProgramPath(config_.prog_file());
|
|
|
|
|
argument_.SetModelParamsPath(config_.params_file());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (config_.use_gpu && config_.use_tensorrt_) {
|
|
|
|
|
if (config_.use_gpu() && config_.tensorrt_engine_enabled()) {
|
|
|
|
|
argument_.SetUseTensorRT(true);
|
|
|
|
|
argument_.SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_);
|
|
|
|
|
argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
|
|
|
|
@ -341,7 +342,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto passes = config_.pass_builder()->AllPasses();
|
|
|
|
|
if (!config_.enable_ir_optim) passes.clear();
|
|
|
|
|
if (!config_.ir_optim()) passes.clear();
|
|
|
|
|
argument_.SetIrAnalysisPasses(passes);
|
|
|
|
|
argument_.SetScopeNotOwned(const_cast<framework::Scope *>(scope_.get()));
|
|
|
|
|
Analyzer().Run(&argument_);
|
|
|
|
@ -358,18 +359,26 @@ template <>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
|
|
|
|
|
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
|
|
|
|
|
VLOG(3) << "create AnalysisConfig";
|
|
|
|
|
if (config.use_gpu) {
|
|
|
|
|
if (config.use_gpu()) {
|
|
|
|
|
// 1. GPU memeroy
|
|
|
|
|
PADDLE_ENFORCE_GT(
|
|
|
|
|
config.fraction_of_gpu_memory, 0.f,
|
|
|
|
|
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
|
|
|
|
|
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
|
|
|
|
|
PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f);
|
|
|
|
|
PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d",
|
|
|
|
|
config.gpu_device_id());
|
|
|
|
|
std::vector<std::string> flags;
|
|
|
|
|
if (config.fraction_of_gpu_memory >= 0.0f ||
|
|
|
|
|
config.fraction_of_gpu_memory <= 0.95f) {
|
|
|
|
|
|
|
|
|
|
float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool();
|
|
|
|
|
if (fraction_of_gpu_memory > 0.95f) {
|
|
|
|
|
LOG(ERROR)
|
|
|
|
|
<< "Allocate too much memory for the GPU memory pool, assigned "
|
|
|
|
|
<< config.memory_pool_init_size_mb() << " MB";
|
|
|
|
|
LOG(ERROR)
|
|
|
|
|
<< "Try to shink the value by setting AnalysisConfig::EnableGpu(...)";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) {
|
|
|
|
|
flags.push_back("dummpy");
|
|
|
|
|
std::string flag = "--fraction_of_gpu_memory_to_use=" +
|
|
|
|
|
std::to_string(config.fraction_of_gpu_memory);
|
|
|
|
|
std::to_string(fraction_of_gpu_memory);
|
|
|
|
|
flags.push_back(flag);
|
|
|
|
|
VLOG(3) << "set flag: " << flag;
|
|
|
|
|
framework::InitGflags(flags);
|
|
|
|
@ -443,22 +452,22 @@ bool AnalysisPredictor::ZeroCopyRun() {
|
|
|
|
|
bool AnalysisPredictor::LoadProgramDesc() {
|
|
|
|
|
// Initialize the inference program
|
|
|
|
|
std::string filename;
|
|
|
|
|
if (!config_.model_dir.empty()) {
|
|
|
|
|
filename = config_.model_dir + "/__model__";
|
|
|
|
|
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
|
|
|
|
|
if (!config_.model_dir().empty()) {
|
|
|
|
|
filename = config_.model_dir() + "/__model__";
|
|
|
|
|
} else if (!config_.prog_file().empty() && !config_.params_file().empty()) {
|
|
|
|
|
// All parameters are saved in a single file.
|
|
|
|
|
// The file names should be consistent with that used
|
|
|
|
|
// in Python API `fluid.io.save_inference_model`.
|
|
|
|
|
filename = config_.prog_file;
|
|
|
|
|
filename = config_.prog_file();
|
|
|
|
|
} else {
|
|
|
|
|
if (config_.model_dir.empty() && config_.prog_file.empty()) {
|
|
|
|
|
if (config_.model_dir().empty() && config_.prog_file().empty()) {
|
|
|
|
|
LOG(ERROR)
|
|
|
|
|
<< "Either model_dir or (prog_file, param_file) should be set.";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
LOG(ERROR) << string::Sprintf(
|
|
|
|
|
"not valid model path '%s' or program path '%s'.", config_.model_dir,
|
|
|
|
|
config_.param_file);
|
|
|
|
|
"not valid model path '%s' or program path '%s'.", config_.model_dir(),
|
|
|
|
|
config_.params_file());
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -478,7 +487,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
|
|
|
|
|
|
|
|
|
|
proto.ParseFromString(pb_content);
|
|
|
|
|
} else {
|
|
|
|
|
proto.ParseFromString(config_.prog_file);
|
|
|
|
|
proto.ParseFromString(config_.prog_file());
|
|
|
|
|
}
|
|
|
|
|
inference_program_.reset(new framework::ProgramDesc(proto));
|
|
|
|
|
return true;
|
|
|
|
@ -508,27 +517,27 @@ bool AnalysisPredictor::LoadParameters() {
|
|
|
|
|
new_var->SetLoDLevel(var->GetLoDLevel());
|
|
|
|
|
new_var->SetPersistable(true);
|
|
|
|
|
|
|
|
|
|
if (!config_.param_file.empty()) {
|
|
|
|
|
if (!config_.params_file().empty()) {
|
|
|
|
|
params.push_back(new_var->Name());
|
|
|
|
|
} else {
|
|
|
|
|
// append_op
|
|
|
|
|
framework::OpDesc *op = load_block->AppendOp();
|
|
|
|
|
op->SetType("load");
|
|
|
|
|
op->SetOutput("Out", {new_var->Name()});
|
|
|
|
|
op->SetAttr("file_path", {config_.model_dir + "/" + new_var->Name()});
|
|
|
|
|
op->SetAttr("file_path", {config_.model_dir() + "/" + new_var->Name()});
|
|
|
|
|
op->CheckAttrs();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!config_.param_file.empty()) {
|
|
|
|
|
if (!config_.params_file().empty()) {
|
|
|
|
|
// sort paramlist to have consistent ordering
|
|
|
|
|
std::sort(params.begin(), params.end());
|
|
|
|
|
// append just the load_combine op
|
|
|
|
|
framework::OpDesc *op = load_block->AppendOp();
|
|
|
|
|
op->SetType("load_combine");
|
|
|
|
|
op->SetOutput("Out", params);
|
|
|
|
|
op->SetAttr("file_path", {config_.param_file});
|
|
|
|
|
op->SetAttr("file_path", {config_.params_file()});
|
|
|
|
|
op->CheckAttrs();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|