You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/inference/tensorrt/test_engine.cc

233 lines
7.5 KiB

/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace inference {
namespace tensorrt {
class TensorRTEngineTest : public ::testing::Test {
protected:
void SetUp() override {
ctx_ = new platform::CUDADeviceContext(platform::CUDAPlace(0));
engine_ = new TensorRTEngine(10, 1 << 10);
engine_->InitNetwork();
}
void TearDown() override {
if (engine_) {
delete engine_;
engine_ = nullptr;
}
}
void PrepareInputOutput(const std::vector<float> &input,
std::vector<int> output_shape) {
TensorFromVector(input, *ctx_, &input_);
output_.Resize(framework::make_ddim(output_shape));
}
void GetOutput(std::vector<float> *output) {
TensorToVector(output_, *ctx_, output);
}
protected:
framework::Tensor input_;
framework::Tensor output_;
TensorRTEngine *engine_;
platform::CUDADeviceContext *ctx_;
};
TEST_F(TensorRTEngineTest, add_layer) {
const int size = 1;
float raw_weight[size] = {2.}; // Weight in CPU memory.
float raw_bias[size] = {3.};
std::vector<void *> buffers(2); // TRT binded inputs
LOG(INFO) << "create weights";
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, size);
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, size);
auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{1, 1, 1});
auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size,
weight.get(), bias.get());
PADDLE_ENFORCE(fc_layer != nullptr);
engine_->DeclareOutput(fc_layer, 0, "y");
LOG(INFO) << "freeze network";
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
// fill in real data
std::vector<float> x_v = {1234};
std::vector<float> y_cpu;
PrepareInputOutput(x_v, {1});
auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
buffers[1] = reinterpret_cast<void *>(y_gpu_data);
LOG(INFO) << "to execute";
engine_->Execute(1, &buffers, ctx_->stream());
LOG(INFO) << "to get output";
GetOutput(&y_cpu);
LOG(INFO) << "to checkout output";
ASSERT_EQ(y_cpu[0], x_v[0] * 2 + 3);
}
TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
// Weight in CPU memory.
// It seems tensorrt FC use col-major: [[1.0, 3.3], [1.1, 4.4]]
// instead of row-major, which is [[1.0, 1.1], [3.3, 4.4]]
float raw_weight[4] = {1.0, 1.1, 3.3, 4.4};
float raw_bias[2] = {1.3, 2.4};
std::vector<void *> buffers(2); // TRT binded inputs
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 4);
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 2);
auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{1, 2, 1});
auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
weight.get(), bias.get());
PADDLE_ENFORCE(fc_layer != nullptr);
engine_->DeclareOutput(fc_layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
// fill in real data
std::vector<float> x_v = {1.0, 2.0};
std::vector<float> y_cpu;
PrepareInputOutput(x_v, {2});
auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
buffers[1] = reinterpret_cast<void *>(y_gpu_data);
engine_->Execute(1, &buffers, ctx_->stream());
LOG(INFO) << "to get output";
GetOutput(&y_cpu);
auto dims = engine_->GetITensor("y")->getDimensions();
ASSERT_EQ(dims.nbDims, 3);
ASSERT_EQ(dims.d[0], 2);
ASSERT_EQ(dims.d[1], 1);
ASSERT_EQ(y_cpu[0], 4.5);
ASSERT_EQ(y_cpu[1], 14.5);
}
TEST_F(TensorRTEngineTest, test_conv2d) {
// Weight in CPU memory.
float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float raw_bias[1] = {0};
std::vector<void *> buffers(2); // TRT binded inputs
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 9);
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 1);
auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3{1, 3, 3});
auto *conv_layer =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3},
weight.get(), bias.get());
PADDLE_ENFORCE(conv_layer != nullptr);
conv_layer->setStride(nvinfer1::DimsHW{1, 1});
conv_layer->setPadding(nvinfer1::DimsHW{1, 1});
engine_->DeclareOutput(conv_layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
// fill in real data
std::vector<float> x_v = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
std::vector<float> y_cpu;
PrepareInputOutput(x_v, {18});
auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
buffers[1] = reinterpret_cast<void *>(y_gpu_data);
engine_->Execute(2, &buffers, ctx_->stream());
LOG(INFO) << "to get output";
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 4.0);
ASSERT_EQ(y_cpu[1], 6.0);
}
TEST_F(TensorRTEngineTest, test_pool2d) {
// Weight in CPU memory.
auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3{1, 2, 2});
std::vector<void *> buffers(2); // TRT binded inputs
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t,
nvinfer1::DimsHW{2, 2});
PADDLE_ENFORCE(pool_layer != nullptr);
pool_layer->setStride(nvinfer1::DimsHW{1, 1});
pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
engine_->DeclareOutput(pool_layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
// fill in real data
std::vector<float> x_v = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
std::vector<float> y_cpu;
PrepareInputOutput(x_v, {2});
auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
buffers[1] = reinterpret_cast<void *>(y_gpu_data);
engine_->Execute(2, &buffers, ctx_->stream());
LOG(INFO) << "to get output";
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 2.0);
ASSERT_EQ(y_cpu[1], 5.0);
}
} // namespace tensorrt
} // namespace inference
} // namespace paddle