Minor MKL-DNN conv int8 performance fixes (#20753)

test=develop
6 years ago · 67b59ddb38
parent 8088395a84
commit 67b59ddb38
2 changed files with 58 additions and 51 deletions
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@ -338,8 +338,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
    const auto& mkldnn_engine = dev_ctx.GetEngine();

    auto* input = ctx.Input<Tensor>("Input");
-    auto* filter = ctx.Input<Tensor>("Filter");
-    auto* bias = ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
    auto* output = ctx.Output<Tensor>("Output");

    PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN,
@ -347,11 +345,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
    PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::format_undef,
                      "Wrong format set for Input tensor");

-    PADDLE_ENFORCE_EQ(filter->layout(), DataLayout::kMKLDNN,
-                      "Wrong layout set for Filter tensor");
-    PADDLE_ENFORCE_NE(filter->format(), MKLDNNMemoryFormat::format_undef,
-                      "Wrong format set for Filter tensor");
-
    PADDLE_ENFORCE_GE(
        input->dims().size(), 4,
        "Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW");
@ -359,57 +352,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
        input->dims().size(), 5,
        "Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW");

-    PADDLE_ENFORCE_GE(
-        filter->dims().size(), 4,
-        "Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
-    PADDLE_ENFORCE_LE(
-        filter->dims().size(), 5,
-        "Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
-
-    if (bias) {
-      PADDLE_ENFORCE_EQ(bias->layout(), DataLayout::kMKLDNN,
-                        "Wrong layout set for Bias tensor");
-      PADDLE_ENFORCE_NE(bias->format(), MKLDNNMemoryFormat::format_undef,
-                        "Wrong format set for Bias tensor");
-
-      PADDLE_ENFORCE_EQ(bias->dims().size(), 1,
-                        "Bias must only have 1 dimension, i.e. X");
-    }
-
-    std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
-    std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
-    int groups = ctx.Attr<int>("groups");
    std::string fuse_activation = ctx.Attr<std::string>("fuse_activation");
-    float fuse_alpha = ctx.Attr<float>("fuse_alpha");
-    float fuse_beta = ctx.Attr<float>("fuse_beta");
    bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection");
-    bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
    bool unsigned_output =
        (fuse_activation == "relu" || fuse_activation == "relu6");

-    PADDLE_ENFORCE(!fuse_residual_conn || !force_fp32_output,
-                   "residual fusion does not support force output with fp32");
-
-    bool is_conv3d = strides.size() == 3U;
-    PADDLE_ENFORCE(
-        is_conv3d
-            ? dilations.size() == 3 && dilations[0] == 1 && dilations[1] == 1 &&
-                  dilations[2] == 1
-            : dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
-        "dilation in convolution is not implemented yet");
-
-    PADDLE_ENFORCE_NE(is_conv3d, true,
-                      "int8 does not support conv3d currently");
-
    const T* input_data = input->data<T>();

    auto src_tz = paddle::framework::vectorize<int>(input->dims());
-    auto weights_tz = paddle::framework::vectorize<int>(filter->dims());
-    int g = std::max(groups, 1);
-
-    GetWeightsTz(weights_tz, g, is_conv3d);
-    auto dst_tz = paddle::framework::vectorize<int>(output->dims());

    mkldnn::memory::data_type src_dt =
        paddle::framework::ToMKLDNNDataType(input->type());
@ -448,6 +398,63 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
        dev_ctx.GetBlob(prim_key));

    if (conv_p == nullptr || !is_test) {
+      float fuse_alpha = ctx.Attr<float>("fuse_alpha");
+      float fuse_beta = ctx.Attr<float>("fuse_beta");
+      bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
+
+      auto* filter = ctx.Input<Tensor>("Filter");
+
+      PADDLE_ENFORCE_EQ(filter->layout(), DataLayout::kMKLDNN,
+                        "Wrong layout set for Filter tensor");
+      PADDLE_ENFORCE_NE(filter->format(), MKLDNNMemoryFormat::format_undef,
+                        "Wrong format set for Filter tensor");
+
+      PADDLE_ENFORCE_GE(
+          filter->dims().size(), 4,
+          "Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
+      PADDLE_ENFORCE_LE(
+          filter->dims().size(), 5,
+          "Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
+
+      PADDLE_ENFORCE_EQ(
+          !fuse_residual_conn || !force_fp32_output, true,
+          "residual fusion does not support force output with fp32");
+
+      auto* bias = ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
+
+      if (bias) {
+        PADDLE_ENFORCE_EQ(bias->layout(), DataLayout::kMKLDNN,
+                          "Wrong layout set for Bias tensor");
+        PADDLE_ENFORCE_NE(bias->format(), MKLDNNMemoryFormat::format_undef,
+                          "Wrong format set for Bias tensor");
+
+        PADDLE_ENFORCE_EQ(bias->dims().size(), 1,
+                          "Bias must only have 1 dimension, i.e. X");
+      }
+
+      std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
+      std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
+      std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
+
+      bool is_conv3d = strides.size() == 3U;
+
+      PADDLE_ENFORCE_NE(is_conv3d, true,
+                        "int8 does not support conv3d currently");
+
+      int groups = ctx.Attr<int>("groups");
+      auto weights_tz = paddle::framework::vectorize<int>(filter->dims());
+      int g = std::max(groups, 1);
+
+      GetWeightsTz(weights_tz, g, is_conv3d);
+      auto dst_tz = paddle::framework::vectorize<int>(output->dims());
+
+      PADDLE_ENFORCE_EQ(
+          is_conv3d
+              ? dilations.size() == 3 && dilations[0] == 1 &&
+                    dilations[1] == 1 && dilations[2] == 1
+              : dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
+          true, "dilation in convolution is not implemented yet");
+
      const K* filter_data = filter->data<K>();
      auto scale_in_data = ctx.Attr<float>("Scale_in");
      auto scale_in_eltwise_data = ctx.Attr<float>("Scale_in_eltwise");
--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@ -205,7 +205,7 @@ inline void AppendKey(std::string* key, const std::vector<int>& dims) {
 template <typename... ArgTypes>
 inline std::string CreateKey(ArgTypes&&... args) {
  std::string key;
-  key.reserve(256);
+  key.reserve(64);
  using expand_type = int[];
  expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...};
  return key;