rewrite unittest of trt_activation_op

wangkuiyi-patch-1
Luo Tao 7 years ago
parent df87e63baa
commit e116129f03

@ -92,6 +92,9 @@ if(WITH_GPU)
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile")
endif()
if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4)
message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile")
endif()
include_directories(${TENSORRT_INCLUDE_DIR})
endif()
elseif(WITH_AMD_GPU)

@ -1,12 +1,9 @@
# Add TRT tests
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine)
# This test is not stable
# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
# SERIAL)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)

@ -21,7 +21,8 @@ namespace tensorrt {
class ReluOpConverter : public OpConverter {
public:
ReluOpConverter() {}
void operator()(const framework::proto::OpDesc& op) override {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope) override {
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework::OpDesc op_desc(op, nullptr);
@ -32,7 +33,7 @@ class ReluOpConverter : public OpConverter {
nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
nvinfer1::ActivationType::kRELU);
engine_->SetITensor(op_desc.Output("Out")[0], layer->getOutput(0));
engine_->DeclareOutput(layer, 0, op_desc.Output("Out")[0]);
}
};

@ -13,94 +13,35 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
USE_OP(relu);
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace tensorrt {
void Compare(const std::string op_type, float input, float expect) {
TEST(ReluOpConverter, main) {
framework::Scope scope;
platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place);
// init fluid op and variable
auto x_var = scope.Var("X");
auto x_tensor = x_var->GetMutable<framework::LoDTensor>();
x_tensor->Resize({1, 1});
x_tensor->mutable_data<float>(place);
std::vector<float> init;
init.push_back(input);
framework::TensorFromVector(init, ctx, x_tensor);
auto out_var = scope.Var("Out");
auto out_tensor = out_var->GetMutable<framework::LoDTensor>();
out_tensor->Resize({1, 1});
out_tensor->mutable_data<float>(place);
framework::OpDesc op_desc;
op_desc.SetType(op_type);
op_desc.SetInput("X", {"X"});
op_desc.SetOutput("Out", {"Out"});
auto op = framework::OpRegistry::CreateOp(*op_desc.Proto());
// run fluid op
op->Run(scope, place);
// get fluid output
std::vector<float> out1;
framework::TensorToVector(*out_tensor, ctx, &out1);
// init tensorrt op
cudaStream_t stream;
ASSERT_EQ(0, cudaStreamCreate(&stream));
TensorRTEngine* engine = new TensorRTEngine(1, 1 << 10, &stream);
engine->InitNetwork();
engine->DeclareInput("X", nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{1, 1, 1});
// convert op
OpConverter op_converter;
op_converter.ConvertOp(*op_desc.Proto(), engine);
engine->DeclareOutput("Out");
engine->FreezeNetwork();
// convert LoDTensor to ITensor
size_t size = x_tensor->memory_size();
EngineIOConverter::ConvertInput(op_type, *x_tensor,
engine->buffer("X").buffer, size, &stream);
// run tensorrt Outp
engine->Execute(1);
// convert ITensor to LoDTensor
EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
out_tensor, size, &stream);
// get tensorrt output
std::vector<float> out2;
framework::TensorToVector(*out_tensor, ctx, &out2);
// compare
ASSERT_EQ(out1[0], out2[0]);
ASSERT_EQ(out1[0], expect);
delete engine;
cudaStreamDestroy(stream);
}
TEST(OpConverter, ConvertRelu) {
Compare("relu", 1, 1); // relu(1) = 1
Compare("relu", -5, 0); // relu(-5) = 0
std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000);
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
// Prepare Op description
framework::OpDesc desc;
desc.SetType("relu");
desc.SetInput("X", {"relu-X"});
desc.SetOutput("Out", {"relu-Out"});
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(10);
}
} // namespace tensorrt
} // namespace inference
} // namespace paddle
USE_OP(activation);
USE_OP(relu);

Loading…
Cancel
Save