|
|
@ -19,8 +19,10 @@
|
|
|
|
#include <string>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "paddle/fluid/framework/op_registry.h"
|
|
|
|
#include "paddle/fluid/framework/operator.h"
|
|
|
|
#include "paddle/fluid/framework/operator.h"
|
|
|
|
#include "paddle/fluid/inference/analysis/helper.h"
|
|
|
|
#include "paddle/fluid/inference/analysis/helper.h"
|
|
|
|
|
|
|
|
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
|
|
|
|
#include "paddle/fluid/inference/tensorrt/engine.h"
|
|
|
|
#include "paddle/fluid/inference/tensorrt/engine.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
@ -29,6 +31,35 @@ DECLARE_int32(tensorrt_engine_batch_size);
|
|
|
|
|
|
|
|
|
|
|
|
namespace operators {
|
|
|
|
namespace operators {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using FluidDT = framework::proto::VarType_Type;
|
|
|
|
|
|
|
|
using TRT_DT = nvinfer1::DataType;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TRT_DT FluidDataType2TRT(FluidDT type) {
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
|
|
|
|
case FluidDT::VarType_Type_FP32:
|
|
|
|
|
|
|
|
return TRT_DT::kFLOAT;
|
|
|
|
|
|
|
|
case FluidDT::VarType_Type_INT32:
|
|
|
|
|
|
|
|
return TRT_DT::kINT32;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
return TRT_DT::kINT32;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
PADDLE_THROW("unkown type");
|
|
|
|
|
|
|
|
return TRT_DT::kINT32;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t>& shape) {
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_GT(shape.size(), 1UL,
|
|
|
|
|
|
|
|
"TensorRT' tensor input requires at least 2 dimensions");
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_LE(shape.size(), 4UL,
|
|
|
|
|
|
|
|
"TensorRT' tensor input requires at most 4 dimensions");
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_EQ(shape.size(), 4UL);
|
|
|
|
|
|
|
|
return nvinfer1::DimsCHW(shape[1], shape[2], shape[3]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
|
|
using inference::Singleton;
|
|
|
|
using inference::Singleton;
|
|
|
|
using inference::tensorrt::TRT_EngineManager;
|
|
|
|
using inference::tensorrt::TRT_EngineManager;
|
|
|
|
|
|
|
|
|
|
|
@ -47,7 +78,7 @@ class TensorRTEngineOp : public framework::OperatorWithKernel {
|
|
|
|
.FindVar(input0)
|
|
|
|
.FindVar(input0)
|
|
|
|
->GetMutable<framework::LoDTensor>()
|
|
|
|
->GetMutable<framework::LoDTensor>()
|
|
|
|
->type()),
|
|
|
|
->type()),
|
|
|
|
platform::CPUPlace());
|
|
|
|
ctx.GetPlace());
|
|
|
|
return kt;
|
|
|
|
return kt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
@ -94,7 +125,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
|
|
|
|
|
|
|
|
|
|
|
|
// Convert output tensor from engine to fluid
|
|
|
|
// Convert output tensor from engine to fluid
|
|
|
|
int output_index = 0;
|
|
|
|
int output_index = 0;
|
|
|
|
|
|
|
|
VLOG(4) << "TensorRT Engine Op Outputs:";
|
|
|
|
for (const auto& y : context.Outputs("Ys")) {
|
|
|
|
for (const auto& y : context.Outputs("Ys")) {
|
|
|
|
|
|
|
|
VLOG(4) << y;
|
|
|
|
// convert output and copy to fluid.
|
|
|
|
// convert output and copy to fluid.
|
|
|
|
nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]);
|
|
|
|
nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]);
|
|
|
|
auto dims = trt_t->getDimensions();
|
|
|
|
auto dims = trt_t->getDimensions();
|
|
|
@ -113,9 +146,11 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
|
|
|
|
// TODO(Superjomn) change this float to dtype size.
|
|
|
|
// TODO(Superjomn) change this float to dtype size.
|
|
|
|
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) *
|
|
|
|
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) *
|
|
|
|
FLAGS_tensorrt_engine_batch_size;
|
|
|
|
FLAGS_tensorrt_engine_batch_size;
|
|
|
|
engine->GetOutputInCPU(output_maps[output_index],
|
|
|
|
engine->GetOutputInGPU(
|
|
|
|
fluid_t->mutable_data<float>(platform::CPUPlace()),
|
|
|
|
output_maps[output_index],
|
|
|
|
size * sizeof(float));
|
|
|
|
fluid_t->mutable_data<float>(platform::CUDAPlace(
|
|
|
|
|
|
|
|
boost::get<platform::CUDAPlace>(context.GetPlace()).device)),
|
|
|
|
|
|
|
|
size * sizeof(float));
|
|
|
|
//} else {
|
|
|
|
//} else {
|
|
|
|
// engine->GetOutputInGPU(
|
|
|
|
// engine->GetOutputInGPU(
|
|
|
|
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
|
|
|
|
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
|
|
|
@ -128,8 +163,67 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
protected:
|
|
|
|
// Build the engine.
|
|
|
|
void Prepare(const framework::ExecutionContext& context) const {
|
|
|
|
void Prepare(const framework::ExecutionContext& context) const;
|
|
|
|
VLOG(4) << "Prepare engine";
|
|
|
|
|
|
|
|
// Get the ProgramDesc and pass to convert.
|
|
|
|
|
|
|
|
framework::proto::BlockDesc block_desc;
|
|
|
|
|
|
|
|
block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
|
|
|
|
|
|
|
|
int max_batch = context.Attr<int>("max_batch");
|
|
|
|
|
|
|
|
auto max_workspace = context.Attr<int>("max_workspace");
|
|
|
|
|
|
|
|
auto params = context.Attr<std::vector<std::string>>("parameters");
|
|
|
|
|
|
|
|
std::unordered_set<std::string> parameters;
|
|
|
|
|
|
|
|
for (const auto& param : params) {
|
|
|
|
|
|
|
|
parameters.insert(param);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> output_maps =
|
|
|
|
|
|
|
|
context.Attr<std::vector<std::string>>("output_name_mapping");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO(Superjomn) replace this with a different stream
|
|
|
|
|
|
|
|
auto* engine = Singleton<TRT_EngineManager>::Global().Create(
|
|
|
|
|
|
|
|
max_batch, max_workspace, nullptr /*engine hold its own stream*/,
|
|
|
|
|
|
|
|
context.Attr<std::string>("engine_uniq_key"),
|
|
|
|
|
|
|
|
boost::get<platform::CUDAPlace>(context.GetPlace()).device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engine->InitNetwork();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
|
|
|
|
|
|
|
|
VLOG(4) << "parsed var size " << block.AllVars().size();
|
|
|
|
|
|
|
|
// Add inputs
|
|
|
|
|
|
|
|
VLOG(4) << "declare inputs";
|
|
|
|
|
|
|
|
for (auto& input : context.Inputs("Xs")) {
|
|
|
|
|
|
|
|
if (parameters.count(input)) continue;
|
|
|
|
|
|
|
|
VLOG(4) << "declare input " << input;
|
|
|
|
|
|
|
|
auto* var = block.FindVar(input);
|
|
|
|
|
|
|
|
// TensorRT engine need to create parameters. The parameter's description
|
|
|
|
|
|
|
|
// should be set in
|
|
|
|
|
|
|
|
PADDLE_ENFORCE(var, "no variable called %s", input);
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
|
|
|
|
|
|
|
|
"TensorRT engine only takes LoDTensor as input");
|
|
|
|
|
|
|
|
auto shape = var->GetShape();
|
|
|
|
|
|
|
|
// For the special batch_size placeholder -1, drop it and pass the real
|
|
|
|
|
|
|
|
// shape of data.
|
|
|
|
|
|
|
|
// TODO(Superjomn) fix this with batch broadcast, or it can't handle
|
|
|
|
|
|
|
|
// variational batch size.
|
|
|
|
|
|
|
|
if (shape[0] == -1) {
|
|
|
|
|
|
|
|
shape[0] = FLAGS_tensorrt_engine_batch_size;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
engine->DeclareInput(
|
|
|
|
|
|
|
|
input, FluidDataType2TRT(
|
|
|
|
|
|
|
|
var->Proto()->type().lod_tensor().tensor().data_type()),
|
|
|
|
|
|
|
|
Vec2TRT_Dims(shape));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference::Singleton<inference::tensorrt::OpConverter>::Global()
|
|
|
|
|
|
|
|
.ConvertBlock(block_desc, parameters, context.scope(), engine);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Add outputs
|
|
|
|
|
|
|
|
for (auto& output : output_maps) {
|
|
|
|
|
|
|
|
engine->DeclareOutput(output);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
engine->FreezeNetwork();
|
|
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace operators
|
|
|
|
} // namespace operators
|
|
|
|