refine error message related to paddle-TRT (#27256)

disable_ut_1
Pei Yang 4 years ago committed by GitHub
parent d708b21074
commit aae41c6fca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -63,11 +63,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
void TensorRTEngine::FreezeNetwork() { void TensorRTEngine::FreezeNetwork() {
freshDeviceId(); freshDeviceId();
VLOG(3) << "TRT to freeze network"; VLOG(3) << "TRT to freeze network";
PADDLE_ENFORCE(infer_builder_ != nullptr, PADDLE_ENFORCE_NOT_NULL(infer_builder_,
"Call InitNetwork first to initialize network."); platform::errors::InvalidArgument(
PADDLE_ENFORCE_EQ(network() != nullptr, true, "Inference builder of TRT is null. Please make "
platform::errors::InvalidArgument( "sure you call InitNetwork first."));
"Call InitNetwork first to initialize network.")); PADDLE_ENFORCE_NOT_NULL(network(),
platform::errors::InvalidArgument(
"Call InitNetwork first to initialize network."));
// build engine. // build engine.
infer_builder_->setMaxBatchSize(max_batch_); infer_builder_->setMaxBatchSize(max_batch_);
infer_builder_->setMaxWorkspaceSize(max_workspace_); infer_builder_->setMaxWorkspaceSize(max_workspace_);
@ -210,7 +212,10 @@ void TensorRTEngine::FreezeNetwork() {
} else { } else {
infer_engine_.reset(infer_builder_->buildCudaEngine(*network())); infer_engine_.reset(infer_builder_->buildCudaEngine(*network()));
} }
PADDLE_ENFORCE(infer_engine_ != nullptr, "build cuda engine failed!"); PADDLE_ENFORCE_NOT_NULL(
infer_engine_, platform::errors::Fatal(
"Build TensorRT cuda engine failed! Please recheck "
"you configurations related to paddle-TensorRT."));
} }
nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name, nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
@ -220,8 +225,16 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The TRT network should be initialized first.")); "The TRT network should be initialized first."));
auto *input = network()->addInput(name.c_str(), dtype, dims); auto *input = network()->addInput(name.c_str(), dtype, dims);
PADDLE_ENFORCE(input, "infer network add input %s failed", name); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE(input->isNetworkInput()); input, platform::errors::InvalidArgument("Adding input %s failed in "
"TensorRT inference network. "
"Please recheck your input.",
name));
PADDLE_ENFORCE_EQ(input->isNetworkInput(), true,
platform::errors::InvalidArgument(
"Input %s is not the input of TRT inference network. "
"Please recheck your input.",
name));
TensorRTEngine::SetITensor(name, input); TensorRTEngine::SetITensor(name, input);
return input; return input;
} }
@ -230,31 +243,53 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
const std::string &name) { const std::string &name) {
auto *output = layer->getOutput(offset); auto *output = layer->getOutput(offset);
SetITensor(name, output); SetITensor(name, output);
PADDLE_ENFORCE(output != nullptr); PADDLE_ENFORCE_NOT_NULL(
output, platform::errors::InvalidArgument(
"The output %s of TRT engine should not be null.", name));
output->setName(name.c_str()); output->setName(name.c_str());
PADDLE_ENFORCE(!output->isNetworkInput()); PADDLE_ENFORCE_EQ(output->isNetworkInput(), false,
platform::errors::InvalidArgument(
"The output %s of TRT engine should not be the input "
"of the network at the same time.",
name));
network()->markOutput(*output); network()->markOutput(*output);
PADDLE_ENFORCE(output->isNetworkOutput()); PADDLE_ENFORCE_EQ(
output->isNetworkOutput(), true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output of the network.",
name));
} }
void TensorRTEngine::DeclareOutput(const std::string &name) { void TensorRTEngine::DeclareOutput(const std::string &name) {
auto *output = TensorRTEngine::GetITensor(name); auto *output = TensorRTEngine::GetITensor(name);
PADDLE_ENFORCE(output != nullptr); PADDLE_ENFORCE_NOT_NULL(
output, platform::errors::InvalidArgument(
"The output %s of TRT engine should not be null.", name));
output->setName(name.c_str()); output->setName(name.c_str());
PADDLE_ENFORCE(!output->isNetworkInput()); PADDLE_ENFORCE_EQ(output->isNetworkInput(), false,
platform::errors::InvalidArgument(
"The output %s of TRT engine should not be the input "
"of the network at the same time.",
name));
network()->markOutput(*output); network()->markOutput(*output);
} }
void TensorRTEngine::SetITensor(const std::string &name, void TensorRTEngine::SetITensor(const std::string &name,
nvinfer1::ITensor *tensor) { nvinfer1::ITensor *tensor) {
PADDLE_ENFORCE(tensor != nullptr); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE_EQ(0, itensor_map_.count(name), "duplicate ITensor name %s", tensor, platform::errors::InvalidArgument(
name); "Tensor named %s of TRT engine should not be null.", name));
PADDLE_ENFORCE_EQ(
0, itensor_map_.count(name),
platform::errors::InvalidArgument(
"Tensor named %s of TRT engine should not be duplicated", name));
itensor_map_[name] = tensor; itensor_map_[name] = tensor;
} }
nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) { nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
PADDLE_ENFORCE(itensor_map_.count(name), "no ITensor %s", name); PADDLE_ENFORCE_EQ(itensor_map_.count(name), true,
platform::errors::NotFound(
"Tensor named %s is not found in TRT engine", name));
return itensor_map_[name]; return itensor_map_[name];
} }
@ -271,11 +306,11 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
std::string splitter = "__"; std::string splitter = "__";
std::string name_with_suffix = name + splitter + name_suffix; std::string name_with_suffix = name + splitter + name_suffix;
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0,
weight_map.count(name_with_suffix), 0, platform::errors::AlreadyExists(
"During TRT Op converter: We set weight %s with the same name " "The weight named %s is set into the weight map "
"twice into the weight_map", "twice in TRT OP converter.",
name_with_suffix); name_with_suffix));
weight_map[name_with_suffix].reset(new framework::Tensor()); weight_map[name_with_suffix].reset(new framework::Tensor());
weight_map[name_with_suffix]->Resize(weight_tensor->dims()); weight_map[name_with_suffix]->Resize(weight_tensor->dims());
TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get()); TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get());
@ -297,7 +332,10 @@ nvinfer1::IPluginLayer *TensorRTEngine::AddPlugin(
void TensorRTEngine::freshDeviceId() { void TensorRTEngine::freshDeviceId() {
int count; int count;
cudaGetDeviceCount(&count); cudaGetDeviceCount(&count);
PADDLE_ENFORCE_LT(device_id_, count); PADDLE_ENFORCE_LT(device_id_, count,
platform::errors::OutOfRange(
"Device id %d exceeds the current device count: %d.",
device_id_, count));
cudaSetDevice(device_id_); cudaSetDevice(device_id_);
} }

@ -196,8 +196,10 @@ class TensorRTEngine {
} }
nvinfer1::IHostMemory* Serialize() { nvinfer1::IHostMemory* Serialize() {
PADDLE_ENFORCE(infer_engine_ != nullptr, PADDLE_ENFORCE_NOT_NULL(
"You should build engine first and then serialize"); infer_engine_,
platform::errors::InvalidArgument(
"The TensorRT engine must be built first before serialization"));
ihost_memory_.reset(infer_engine_->serialize()); ihost_memory_.reset(infer_engine_->serialize());
return ihost_memory_.get(); return ihost_memory_.get();
} }
@ -222,8 +224,14 @@ class TensorRTEngine {
engine_serialized_data.c_str(), engine_serialized_data.size(), engine_serialized_data.c_str(), engine_serialized_data.size(),
&inference::Singleton<plugin::PluginFactoryTensorRT>::Global())); &inference::Singleton<plugin::PluginFactoryTensorRT>::Global()));
} }
PADDLE_ENFORCE(infer_engine_ != nullptr, PADDLE_ENFORCE_NOT_NULL(
"build cuda engine failed when deserialize engine info.!"); infer_engine_,
platform::errors::Fatal(
"Building TRT cuda engine failed when deserializing engine info. "
"Please check:\n1. Your TRT serialization is generated and loaded "
"on the same GPU architecture;\n2. The Paddle Inference version of "
"generating serialization file and doing inference are "
"consistent."));
} }
void SetRuntimeBatch(size_t batch_size); void SetRuntimeBatch(size_t batch_size);

@ -56,14 +56,27 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data,
nvinfer1::Dims ElementWisePlugin::getOutputDimensions( nvinfer1::Dims ElementWisePlugin::getOutputDimensions(
int index, const nvinfer1::Dims *input_dims, int num_inputs) { int index, const nvinfer1::Dims *input_dims, int num_inputs) {
PADDLE_ENFORCE_EQ(index, 0); PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument(
PADDLE_ENFORCE_EQ(num_inputs, 2); "There is only one output in TRT elementwise "
PADDLE_ENFORCE_NOT_NULL(input_dims); "op plugin, but got output index: %d.",
index));
PADDLE_ENFORCE_EQ(num_inputs, 2, platform::errors::InvalidArgument(
"There are 2 inputs in TRT elementwise "
"op plugin, but got input number: %d.",
num_inputs));
PADDLE_ENFORCE_NOT_NULL(
input_dims,
platform::errors::InvalidArgument(
"The input dims of TRT elementwise op plugin should not be null."));
return input_dims[0]; return input_dims[0];
} }
int ElementWisePlugin::initialize() { int ElementWisePlugin::initialize() {
PADDLE_ENFORCE_GT(dims_y_.nbDims, 0); PADDLE_ENFORCE_GT(dims_y_.nbDims, 0,
platform::errors::InvalidArgument(
"The dimension of input Y of TRT elementwise op plugin "
"should be greater than 0, but got %d.",
dims_y_.nbDims));
axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_; axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_;
int trimed_nb_dims = dims_y_.nbDims; int trimed_nb_dims = dims_y_.nbDims;
@ -74,8 +87,18 @@ int ElementWisePlugin::initialize() {
} }
dims_y_.nbDims = trimed_nb_dims; dims_y_.nbDims = trimed_nb_dims;
PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_); PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_,
PADDLE_ENFORCE_LT(axis_, dims_x_.nbDims); platform::errors::InvalidArgument(
"We expect [number of x dims] >= [number of y dims + "
"axis] in TRT elementwise op plugin, but got [number "
"of x dims] = %d, [number of y dims + axis] = %d.",
dims_x_.nbDims, dims_y_.nbDims + axis_));
PADDLE_ENFORCE_LT(
axis_, dims_x_.nbDims,
platform::errors::InvalidArgument("We expect [axis] < [number of x dims] "
"in TRT elementwise op plugin, but got "
"[axis] = %d, [number of x dims] = %d.",
axis_, dims_x_.nbDims));
prev_size_ = 1; prev_size_ = 1;
midd_size_ = 1; midd_size_ = 1;
@ -86,7 +109,9 @@ int ElementWisePlugin::initialize() {
for (int i = 0; i < dims_y_.nbDims; ++i) { for (int i = 0; i < dims_y_.nbDims; ++i) {
PADDLE_ENFORCE_EQ(dims_x_.d[i + axis_], dims_y_.d[i], PADDLE_ENFORCE_EQ(dims_x_.d[i + axis_], dims_y_.d[i],
"Broadcast dimension mismatch."); platform::errors::InvalidArgument(
"Broadcast dimension mismatch. The dims of input Y "
"should be a subsequence of X."));
midd_size_ *= dims_y_.d[i]; midd_size_ *= dims_y_.d[i];
} }
@ -221,7 +246,10 @@ int ElementwisePluginDynamic::enqueue(
elementwise_kernel<<<block, thread, 0, stream>>>( elementwise_kernel<<<block, thread, 0, stream>>>(
num, x, y, out, prev_size, midd_size, post_size, details::Mul<float>()); num, x, y, out, prev_size, midd_size, post_size, details::Mul<float>());
} else { } else {
PADDLE_THROW("Not implemented."); PADDLE_THROW(platform::errors::Unimplemented(
"Paddle-TRT only support elementwise operation: {add, mul} currently, "
"but got %s.",
type_));
} }
return cudaGetLastError() != cudaSuccess; return cudaGetLastError() != cudaSuccess;

@ -74,7 +74,9 @@ TEST_F(TensorRTEngineTest, add_layer) {
nvinfer1::DimsCHW{1, 1, 1}); nvinfer1::DimsCHW{1, 1, 1});
auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size, auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size,
weight.get(), bias.get()); weight.get(), bias.get());
PADDLE_ENFORCE(fc_layer != nullptr); PADDLE_ENFORCE_NOT_NULL(fc_layer,
platform::errors::InvalidArgument(
"TRT fully connected layer building failed."));
engine_->DeclareOutput(fc_layer, 0, "y"); engine_->DeclareOutput(fc_layer, 0, "y");
LOG(INFO) << "freeze network"; LOG(INFO) << "freeze network";
@ -116,7 +118,9 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
nvinfer1::DimsCHW{1, 2, 1}); nvinfer1::DimsCHW{1, 2, 1});
auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2, auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
weight.get(), bias.get()); weight.get(), bias.get());
PADDLE_ENFORCE(fc_layer != nullptr); PADDLE_ENFORCE_NOT_NULL(fc_layer,
platform::errors::InvalidArgument(
"TRT fully connected layer building failed."));
engine_->DeclareOutput(fc_layer, 0, "y"); engine_->DeclareOutput(fc_layer, 0, "y");
engine_->FreezeNetwork(); engine_->FreezeNetwork();
@ -160,7 +164,9 @@ TEST_F(TensorRTEngineTest, test_conv2d) {
auto *conv_layer = auto *conv_layer =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3}, TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3},
weight.get(), bias.get()); weight.get(), bias.get());
PADDLE_ENFORCE(conv_layer != nullptr); PADDLE_ENFORCE_NOT_NULL(conv_layer,
platform::errors::InvalidArgument(
"TRT convolution layer building failed."));
conv_layer->setStride(nvinfer1::DimsHW{1, 1}); conv_layer->setStride(nvinfer1::DimsHW{1, 1});
conv_layer->setPadding(nvinfer1::DimsHW{1, 1}); conv_layer->setPadding(nvinfer1::DimsHW{1, 1});
@ -199,7 +205,9 @@ TEST_F(TensorRTEngineTest, test_pool2d) {
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t, auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t,
nvinfer1::DimsHW{2, 2}); nvinfer1::DimsHW{2, 2});
PADDLE_ENFORCE(pool_layer != nullptr); PADDLE_ENFORCE_NOT_NULL(
pool_layer,
platform::errors::InvalidArgument("TRT pooling layer building failed."));
pool_layer->setStride(nvinfer1::DimsHW{1, 1}); pool_layer->setStride(nvinfer1::DimsHW{1, 1});
pool_layer->setPadding(nvinfer1::DimsHW{0, 0}); pool_layer->setPadding(nvinfer1::DimsHW{0, 0});

@ -83,9 +83,8 @@ bool TRTInt8Calibrator::setBatch(
engine_name_, it.first)); engine_name_, it.first));
} }
const auto& d = dataptr->second; const auto& d = dataptr->second;
PADDLE_ENFORCE( PADDLE_ENFORCE_CUDA_SUCCESS(
cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice), cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice));
"Fail to cudaMemcpy %s for %s", engine_name_, it.first);
} }
data_is_set_ = true; data_is_set_ = true;

@ -208,8 +208,11 @@ class TensorRTEngineOp : public framework::OperatorBase {
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx).stream();
PADDLE_ENFORCE_EQ(input_names_.empty(), false, PADDLE_ENFORCE_EQ(
"should pass at least one input"); input_names_.empty(), false,
platform::errors::PreconditionNotMet(
"TensorRT engine needs at least one input, but no input is found. "
"Please check if you set the input correctly."));
std::vector<std::string> output_maps = std::vector<std::string> output_maps =
Attr<std::vector<std::string>>("output_name_mapping"); Attr<std::vector<std::string>>("output_name_mapping");
@ -295,12 +298,19 @@ class TensorRTEngineOp : public framework::OperatorBase {
#endif #endif
} }
auto *fluid_v = scope.FindVar(y); auto *fluid_v = scope.FindVar(y);
PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); PADDLE_ENFORCE_NOT_NULL(
fluid_v,
platform::errors::NotFound(
"Output variable %s is not found in TensorRT subgraph.", y));
auto *fluid_t = fluid_v->GetMutable<framework::LoDTensor>(); auto *fluid_t = fluid_v->GetMutable<framework::LoDTensor>();
fluid_t->Resize(framework::make_ddim(ddim)); fluid_t->Resize(framework::make_ddim(ddim));
PADDLE_ENFORCE(bind_index < num_bindings, PADDLE_ENFORCE_LT(bind_index, num_bindings,
"The bind index should be less than num_bindings"); platform::errors::InvalidArgument(
"The binding index in TRT engine should be less "
"than the number of bindings, but got binding "
"index = %d, number of bindings = %d.",
bind_index, num_bindings));
buffers[bind_index] = static_cast<void *>(fluid_t->mutable_data<float>( buffers[bind_index] = static_cast<void *>(fluid_t->mutable_data<float>(
BOOST_GET_CONST(platform::CUDAPlace, dev_place))); BOOST_GET_CONST(platform::CUDAPlace, dev_place)));

Loading…
Cancel
Save