[Paddle-TRT] trt affine channel converter (#31628)

* trt affine channel converter * add trt affine channel base test * add trt affine channel NHWC * remove asterisk for python2 compatibility * trt affine channel converter * add trt affine channel base test * add trt affine channel NHWC * remove asterisk for python2 compatibility * fix rebase * move LodTensor to Tensor * add dbg info * affine channel converter only support NCHW * scale,bias are parameters, use create_parameters api * reduce test input size to not exceed the timelimit of ci * refine affine channel unittest and add serialization/dynamic test * change super to InferencePassTest for python2 compatibility * change super to InferencePassTest for python2 compatibility * fix affine channel fp16 serialize setting
4 years ago · bfb5cf5567
parent b47478efc2
commit bfb5cf5567
5 changed files with 246 additions and 1 deletions
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@ -1192,6 +1192,7 @@ USE_TRT_CONVERTER(scale);
 USE_TRT_CONVERTER(stack);
 USE_TRT_CONVERTER(clip);
 USE_TRT_CONVERTER(gather);
+USE_TRT_CONVERTER(affine_channel);
 USE_TRT_CONVERTER(multiclass_nms);
 USE_TRT_CONVERTER(nearest_interp);
 #endif
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@ -6,6 +6,7 @@ nv_library(tensorrt_converter
                shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
                emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
                gather_op.cc
+                affine_channel_op.cc
                multiclass_nms_op.cc
                nearest_interp_op.cc
           DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
--- a/paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
@ -0,0 +1,94 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/data_layout.h"
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * Affine Channel Op
+ */
+class AffineChannelOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(3) << "convert a fluid affine_channel op to tensorrt scale nd layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_name = op_desc.Input("X").front();
+    std::string scale_name = op_desc.Input("Scale").front();
+    std::string bias_name = op_desc.Input("Bias").front();
+    std::string output_name = op_desc.Output("Out").front();
+
+    auto input_tensor = engine_->GetITensor(input_name);
+    auto idim = input_tensor->getDimensions();
+
+    auto* scale_v = scope.FindVar(scale_name);
+    auto* scale_t = scale_v->GetMutable<framework::LoDTensor>();
+    float* scale_ptr = engine_->GetWeightCPUData(scale_name, scale_t, false);
+
+    auto* bias_v = scope.FindVar(bias_name);
+    auto* bias_t = bias_v->GetMutable<framework::LoDTensor>();
+    float* bias_ptr = engine_->GetWeightCPUData(bias_name, bias_t, false);
+
+    auto data_layout = framework::StringToDataLayout(
+        BOOST_GET_CONST(std::string, op_desc.GetAttr("data_layout")));
+
+    PADDLE_ENFORCE_EQ(
+        data_layout, framework::DataLayout::kNCHW,
+        platform::errors::InvalidArgument(
+            "TensorRT affine channel converter can only convert NCHW format. "
+            "Other format should be run in fluid mode. Report a bug on github "
+            "issue if you see this line."));
+
+    // tensorrt scalend layer only support spatial dims >= 2,
+    // so nhwc is not availabe (spatial dims == 0)
+    const int channel_axis = engine_->with_dynamic_shape();
+
+    TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT,
+                                         static_cast<void*>(scale_ptr),
+                                         (size_t)idim.d[channel_axis]};
+    TensorRTEngine::Weight bias_weights{nvinfer1::DataType::kFLOAT,
+                                        static_cast<void*>(bias_ptr),
+                                        (size_t)idim.d[channel_axis]};
+    TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
+                                         0};
+
+    auto layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *input_tensor,
+                                      nvinfer1::ScaleMode::kCHANNEL,
+                                      bias_weights.get(), scale_weights.get(),
+                                      power_weights.get(), channel_axis);
+
+    RreplenishLayerAndOutput(layer, "affine_channel", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@ -111,6 +111,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "flatten2",
      "flatten",
      "gather",
+      "affine_channel",
      "multiclass_nms",
      "nearest_interp",
  };
@ -196,6 +197,13 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
      if (!with_dynamic_shape || desc.Input("Axis").size() > 0) return false;
    }

+    if (op_type == "affine_channel") {
+      if (!desc.HasAttr("data_layout")) return false;
+      auto data_layout = framework::StringToDataLayout(
+          BOOST_GET_CONST(std::string, desc.GetAttr("data_layout")));
+      if (data_layout != framework::DataLayout::kNCHW) return false;
+    }
+
    if (op_type == "multiclass_nms") {
      if (with_dynamic_shape) return false;
      auto* block = desc.Block();
@ -238,6 +246,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
        return false;
      }
    }
+
    if (op_type == "nearest_interp") {
      std::vector<std::string> attrs{"data_layout",   "interp_method",
                                     "align_corners", "scale",
@ -254,7 +263,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
          BOOST_GET_CONST(std::string, desc.GetAttr("interp_method"));
      if (interp_method != "nearest") return false;
    }
-
    if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
  }
  return false;
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py
@ -0,0 +1,141 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import itertools
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class TRTAffineChannelTest(InferencePassTest):
+    def setUp(self):
+        self.bs = 2
+        self.channel = 8
+        self.height = 16
+        self.width = 16
+        self.data_layout = 'NCHW'
+        self.precision = AnalysisConfig.Precision.Float32
+        self.serialize = False
+        self.enable_trt = True
+
+    def build(self):
+        # set min_graph_size to 2, 
+        # because affine channel doesn't support nhwc format
+        self.trt_parameters = InferencePassTest.TensorRTParam(
+            1 << 30, self.bs, 2, self.precision, self.serialize, False)
+
+        with fluid.program_guard(self.main_program, self.startup_program):
+            if self.data_layout == 'NCHW':
+                shape = [-1, self.channel, self.height, self.width]
+            else:
+                shape = [-1, self.height, self.width, self.channel]
+
+            data = fluid.data(name='in', shape=shape, dtype='float32')
+            # set scale, bias by constant
+            scale = fluid.layers.create_parameter(
+                shape=[self.channel],
+                dtype='float32',
+                default_initializer=fluid.initializer.Constant(2.))
+            bias = fluid.layers.create_parameter(
+                shape=[self.channel],
+                dtype='float32',
+                default_initializer=fluid.initializer.Constant(.5))
+            affine_channel_out = fluid.layers.affine_channel(
+                data, scale=scale, bias=bias, data_layout=self.data_layout)
+            out = fluid.layers.batch_norm(affine_channel_out, is_test=True)
+
+        shape[0] = self.bs
+        self.feeds = {'in': np.random.random(shape).astype('float32'), }
+        self.fetch_list = [out]
+
+    def check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            atol = 1e-5
+            if self.trt_parameters.precision == AnalysisConfig.Precision.Half:
+                atol = 1e-3
+            self.check_output_with_option(use_gpu, atol, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+    def run_test(self):
+        self.build()
+        self.check_output()
+
+    def run_test_all(self):
+        precision_opt = [
+            AnalysisConfig.Precision.Float32, AnalysisConfig.Precision.Half
+        ]
+        serialize_opt = [False, True]
+
+        if self.data_layout == 'NCHW':
+            min_shape = [
+                self.bs, self.channel, self.height // 2, self.width // 2
+            ]
+            max_shape = [self.bs, self.channel, self.height * 2, self.width * 2]
+            opt_shape = [self.bs, self.channel, self.height, self.width]
+
+        if self.data_layout == 'NHWC':
+            min_shape = [
+                self.bs, self.height // 2, self.width // 2, self.channel
+            ]
+            max_shape = [self.bs, self.height * 2, self.width * 2, self.channel]
+            opt_shape = [self.bs, self.height, self.width, self.channel]
+
+        dynamic_shape_profile = InferencePassTest.DynamicShapeParam({
+            'in': min_shape
+        }, {'in': max_shape}, {'in': opt_shape}, False)
+        dynamic_shape_opt = [None, dynamic_shape_profile]
+
+        for precision, serialize, dynamic_shape in itertools.product(
+                precision_opt, serialize_opt, dynamic_shape_opt):
+            self.precision = precision
+            self.serialize = serialize
+            self.dynamic_shape_params = dynamic_shape
+            self.run_test()
+
+    def test_base(self):
+        self.run_test()
+
+    def test_fp16(self):
+        self.precision = AnalysisConfig.Precision.Half
+        self.run_test()
+
+    def test_serialize(self):
+        self.serialize = True
+        self.run_test()
+
+    def test_dynamic(self):
+        self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({
+            'in': [self.bs, self.channel, self.height // 2, self.width // 2]
+        }, {'in': [self.bs, self.channel, self.height * 2, self.width * 2]
+            }, {'in': [self.bs, self.channel, self.height, self.width]}, False)
+        self.run_test()
+
+    def test_nchw_all(self):
+        self.run_test_all()
+
+    def test_nhwc(self):
+        self.data_layout = 'NHWC'
+        self.run_test_all()
+
+
+if __name__ == "__main__":
+    unittest.main()