Add int8 GRU kernel (#27220)

* Add int8 GRU kernel with UTs * Lint fixes * More lint fixes
5 years ago · cc3f4b813a
parent 255e0cf978
commit cc3f4b813a
4 changed files with 271 additions and 49 deletions
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@ -19,8 +19,8 @@ SET(MKLDNN_PREFIX_DIR     ${THIRD_PARTY_PATH}/mkldnn)
 SET(MKLDNN_SOURCE_DIR     ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
 SET(MKLDNN_INSTALL_DIR    ${THIRD_PARTY_PATH}/install/mkldnn)
 SET(MKLDNN_INC_DIR        "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
-SET(MKLDNN_REPOSITORY     https://github.com/intel/mkl-dnn.git)
-SET(MKLDNN_TAG            4c05c181b40cf7132f8943411fb3fab1786df0f7)
+SET(MKLDNN_REPOSITORY     https://github.com/oneapi-src/oneDNN.git)
+SET(MKLDNN_TAG            64a48f9565aa72f6359917b3406328075a409939)

 # Introduce variables:
 # * CMAKE_INSTALL_LIBDIR
--- a/paddle/fluid/operators/fused/fusion_gru_op.cc
+++ b/paddle/fluid/operators/fused/fusion_gru_op.cc
@ -206,6 +206,27 @@ void FusionGRUOpMaker::Make() {
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
+  AddAttr<std::string>(
+      "mkldnn_data_type",
+      "(string, default \"float32\"). Data type of mkldnn kernel")
+      .SetDefault("float32")
+      .InEnum({"float32", "int8", "bfloat16"});
+  AddAttr<float>("Scale_data",
+                 "Scale to be used for int8 input/output data."
+                 "Only used with MKL-DNN INT8.")
+      .SetDefault(1.0f);
+  AddAttr<float>("Shift_data",
+                 "Shift to be used for int8 input/output data."
+                 "Only used with MKL-DNN INT8.")
+      .SetDefault(0.0f);
+  AddAttr<std::vector<float>>("Scale_weights",
+                              "Scale_weights to be used for int8 weights data."
+                              "Only used with MKL-DNN INT8.")
+      .SetDefault({1.0f});
+  AddAttr<bool>("force_fp32_output",
+                "(bool, default false) Force INT8 kernel output FP32, only "
+                "used in MKL-DNN INT8")
+      .SetDefault(false);
  AddComment(R"DOC(
 The Fusion complete GRU Operator.
 This operator fuse the fully-connected operator into GRU, 
--- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
@ -0,0 +1,145 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest
+from paddle.fluid.tests.unittests.test_fusion_gru_op import fusion_gru
+from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION
+
+
+class TestFusionGRUINT8MKLDNNOp(OpTest):
+    def set_confs(self):
+        pass
+
+    def setUp(self):
+        self.op_type = "fusion_gru"
+        self.lod = [[2, 4, 3]]
+        self.IC = 3
+        self.OC = 5
+        self.is_reverse = False
+        self.with_h0 = False
+        self.with_bias = True
+        self.act_state = 'tanh'
+        self.act_gate = 'sigmoid'
+        self.origin_mode = True
+        self.use_mkldnn = True
+        self.force_fp32_output = True
+        self.error_margin = 1e-5
+        self.set_confs()
+
+        # RNN dimensions
+        T = sum(self.lod[0])
+        N = len(self.lod[0])
+
+        # Input data
+        x_f32 = np.random.rand(T, self.IC).astype('float32') * 2 - 1
+        scale_data = 63
+        shift_data = 64
+        x_u8 = (x_f32 * scale_data + shift_data).astype(np.uint8)
+
+        # WeightX/WeightH data
+        wx = np.random.rand(self.IC, 3 * self.OC).astype('float32') * 2 - 1
+        wh = np.random.rand(self.OC, 3 * self.OC).astype('float32') * 2 - 1
+
+        # Calculating weight scales
+        # scales = 63 / max(abs(channel_wise(weightsX + weightsH)))
+        # WeightX data shape in PP: [IC, 3 * OC]
+        # WeightH data shape in PP: [OC, 2 * OC] + [OC, OC]
+        # Scales shape in oneDNN:   [3, OC]
+        scale_ur = 63 / np.max(np.abs(
+            np.concatenate(
+                [
+                    wx[:, :2 * self.OC], wh.flatten()[:2 * self.OC * self.OC]
+                    .reshape(self.OC, 2 * self.OC)
+                ],
+                axis=0)),
+                               axis=0)
+        scale_o = 63 / np.max(np.abs(
+            np.concatenate(
+                [
+                    wx[:, 2 * self.OC:], wh.flatten()[2 * self.OC * self.OC:]
+                    .reshape(self.OC, self.OC)
+                ],
+                axis=0)),
+                              axis=0)
+
+        scale_weights = np.concatenate([scale_ur, scale_o]).astype('float')
+
+        bias = np.random.rand(
+            1, 3 * self.OC).astype('float32') if self.with_bias else np.zeros(
+                (1, 3 * self.OC), dtype='float32')
+        h0 = np.random.rand(
+            N, self.OC).astype('float32') if self.with_h0 else np.zeros(
+                (N, self.OC), dtype='float32')
+
+        _, _, _, hidden_f32 = fusion_gru(x_f32, self.lod, h0, wx, wh, bias,
+                                         self.is_reverse, self.origin_mode,
+                                         ACTIVATION[self.act_state],
+                                         ACTIVATION[self.act_gate])
+
+        self.inputs = {'X': (x_u8, self.lod), 'WeightX': wx, 'WeightH': wh}
+
+        if self.with_bias:
+            self.inputs['Bias'] = bias
+
+        if self.with_h0:
+            self.inputs['H0'] = h0
+
+        if self.force_fp32_output:
+            self.error_margin = 1e-1
+            self.outputs = {'Hidden': (hidden_f32, self.lod)}
+        else:
+            self.error_margin = 1
+            hidden_u8 = (hidden_f32 * scale_data + shift_data).astype(np.uint8)
+            self.outputs = {'Hidden': (hidden_u8, self.lod)}
+
+        self.attrs = {
+            'activation': self.act_state,
+            'gate_activation': self.act_gate,
+            'is_reverse': self.is_reverse,
+            'origin_mode': self.origin_mode,
+            'use_mkldnn': self.use_mkldnn,
+            'force_fp32_output': self.force_fp32_output,
+            'Scale_data': scale_data,
+            'Shift_data': shift_data,
+            'Scale_weights': scale_weights
+        }
+
+    def test_check_output(self):
+        self.check_output(check_dygraph=False, atol=self.error_margin)
+
+
+class TestFusionGRUINT8MKLDNNOp2(TestFusionGRUINT8MKLDNNOp):
+    def set_confs(self):
+        self.force_fp32_output = False
+
+
+class TestFusionGRUINT8MKLDNNOp3(TestFusionGRUINT8MKLDNNOp):
+    def set_confs(self):
+        self.origin_mode = False
+
+
+class TestFusionGRUINT8MKLDNNOp4(TestFusionGRUINT8MKLDNNOp):
+    def set_confs(self):
+        self.with_bias = False
+
+
+class TestFusionGRUINT8MKLDNNOp5(TestFusionGRUINT8MKLDNNOp):
+    def set_confs(self):
+        self.with_h0 = False
+
+
+if __name__ == "__main__":
+    unittest.main()