You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
6.1 KiB
183 lines
6.1 KiB
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
#include <glog/logging.h>
|
|
#include <gtest/gtest.h>
|
|
|
|
#include "paddle/fluid/inference/tensorrt/engine.h"
|
|
#include "paddle/fluid/platform/enforce.h"
|
|
|
|
namespace paddle {
|
|
namespace inference {
|
|
namespace tensorrt {
|
|
|
|
class TensorRTEngineTest : public ::testing::Test {
|
|
protected:
|
|
void SetUp() override {
|
|
// ASSERT_EQ(0, cudaStreamCreate(&stream_));
|
|
engine_ = new TensorRTEngine(10, 1 << 10, &stream_);
|
|
engine_->InitNetwork();
|
|
}
|
|
|
|
void TearDown() override {
|
|
delete engine_;
|
|
cudaStreamDestroy(stream_);
|
|
}
|
|
|
|
protected:
|
|
TensorRTEngine* engine_;
|
|
cudaStream_t stream_;
|
|
};
|
|
|
|
TEST_F(TensorRTEngineTest, add_layer) {
|
|
const int size = 1;
|
|
|
|
float raw_weight[size] = {2.}; // Weight in CPU memory.
|
|
float raw_bias[size] = {3.};
|
|
|
|
LOG(INFO) << "create weights";
|
|
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, size);
|
|
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, size);
|
|
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
|
|
nvinfer1::DimsCHW{1, 1, 1});
|
|
auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size,
|
|
weight.get(), bias.get());
|
|
PADDLE_ENFORCE(fc_layer != nullptr);
|
|
|
|
engine_->DeclareOutput(fc_layer, 0, "y");
|
|
LOG(INFO) << "freeze network";
|
|
engine_->FreezeNetwork();
|
|
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
|
|
|
|
// fill in real data
|
|
float x_v = 1234;
|
|
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
|
|
1 * sizeof(float));
|
|
LOG(INFO) << "to execute";
|
|
engine_->Execute(1);
|
|
|
|
LOG(INFO) << "to get output";
|
|
float y_cpu;
|
|
engine_->GetOutputInCPU("y", &y_cpu, 1 * sizeof(float));
|
|
|
|
LOG(INFO) << "to checkout output";
|
|
ASSERT_EQ(y_cpu, x_v * 2 + 3);
|
|
}
|
|
|
|
TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
|
|
// Weight in CPU memory.
|
|
// It seems tensorrt FC use col-major: [[1.0, 3.3], [1.1, 4.4]]
|
|
// instead of row-major, which is [[1.0, 1.1], [3.3, 4.4]]
|
|
float raw_weight[4] = {1.0, 1.1, 3.3, 4.4};
|
|
float raw_bias[2] = {1.3, 2.4};
|
|
|
|
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 4);
|
|
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 2);
|
|
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
|
|
nvinfer1::DimsCHW{1, 2, 1});
|
|
auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
|
|
weight.get(), bias.get());
|
|
PADDLE_ENFORCE(fc_layer != nullptr);
|
|
|
|
engine_->DeclareOutput(fc_layer, 0, "y");
|
|
engine_->FreezeNetwork();
|
|
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
|
|
|
|
float x_v[2] = {1.0, 2.0};
|
|
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
|
|
2 * sizeof(float));
|
|
engine_->Execute(1);
|
|
|
|
LOG(INFO) << "to get output";
|
|
float y_cpu[2] = {-1., -1.};
|
|
|
|
auto dims = engine_->GetITensor("y")->getDimensions();
|
|
ASSERT_EQ(dims.nbDims, 3);
|
|
ASSERT_EQ(dims.d[0], 2);
|
|
ASSERT_EQ(dims.d[1], 1);
|
|
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
|
|
ASSERT_EQ(y_cpu[0], 4.5);
|
|
ASSERT_EQ(y_cpu[1], 14.5);
|
|
}
|
|
|
|
TEST_F(TensorRTEngineTest, test_conv2d) {
|
|
// Weight in CPU memory.
|
|
float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
|
|
float raw_bias[1] = {0};
|
|
|
|
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 9);
|
|
TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 1);
|
|
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
|
|
nvinfer1::Dims3{1, 3, 3});
|
|
auto* conv_layer =
|
|
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3},
|
|
weight.get(), bias.get());
|
|
PADDLE_ENFORCE(conv_layer != nullptr);
|
|
conv_layer->setStride(nvinfer1::DimsHW{1, 1});
|
|
conv_layer->setPadding(nvinfer1::DimsHW{1, 1});
|
|
|
|
engine_->DeclareOutput(conv_layer, 0, "y");
|
|
engine_->FreezeNetwork();
|
|
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
|
|
|
|
float x_v[18] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
|
|
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
|
|
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
|
|
18 * sizeof(float));
|
|
engine_->Execute(2);
|
|
|
|
LOG(INFO) << "to get output";
|
|
float* y_cpu = new float[18];
|
|
engine_->GetOutputInCPU("y", &y_cpu[0], 18 * sizeof(float));
|
|
ASSERT_EQ(y_cpu[0], 4.0);
|
|
ASSERT_EQ(y_cpu[1], 6.0);
|
|
}
|
|
|
|
TEST_F(TensorRTEngineTest, test_pool2d) {
|
|
// Weight in CPU memory.
|
|
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
|
|
nvinfer1::Dims3{1, 2, 2});
|
|
|
|
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
|
|
auto* pool_layer =
|
|
TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
|
|
pool_t, nvinfer1::DimsHW{2, 2});
|
|
|
|
PADDLE_ENFORCE(pool_layer != nullptr);
|
|
pool_layer->setStride(nvinfer1::DimsHW{1, 1});
|
|
pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
|
|
|
|
engine_->DeclareOutput(pool_layer, 0, "y");
|
|
engine_->FreezeNetwork();
|
|
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
|
|
|
|
float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
|
|
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
|
|
8 * sizeof(float));
|
|
engine_->Execute(2);
|
|
|
|
LOG(INFO) << "to get output";
|
|
float* y_cpu = new float[2];
|
|
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
|
|
|
|
ASSERT_EQ(y_cpu[0], 2.0);
|
|
ASSERT_EQ(y_cpu[1], 5.0);
|
|
}
|
|
|
|
} // namespace tensorrt
|
|
} // namespace inference
|
|
} // namespace paddle
|