[DNNL][INT8][FP32] MatMul (#23395)

* Initial FP32 DNNL MatMul Implementation * Implement int8 DNNL MatMul * Unify in-kernel-naming, clean UTs * MatmuL: Introduce op caching * Final adjustments test=develop * Remove dy_graph disablement test=develop * Change dnnl header name to new one test=develop * Contrain multi head check to prevent fails test=develop * Resolve dnnl header problems on MAC CI * Variable namings to kernel and skip_grad_ci added test=develop * Prevent MAC CI from failing * Prevent windows build from failing test=develop * Modify UTs to conform to the rules * Modify MatMul aux functions namings test=develop
6 years ago · a63bcf9ae7
parent a2315d37cf
commit a63bcf9ae7
4 changed files with 466 additions and 1 deletions
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@ -17,6 +17,9 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/blas.h"
+#ifdef PADDLE_WITH_MKLDNN
+#include "paddle/fluid/platform/mkldnn_helper.h"
+#endif

 namespace paddle {
 namespace operators {
@ -409,6 +412,21 @@ class MatMulOp : public framework::OperatorWithKernel {
    context->SetOutputDim("Out", framework::make_ddim(dim_out));
    context->ShareLoD("X", /*->*/ "Out");
  }
+
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
+
+#ifdef PADDLE_WITH_MKLDNN
+    using mkldnn::memory;
+    if (platform::CanMKLDNNBeUsed(ctx)) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+    return framework::OpKernelType(input_data_type, ctx.GetPlace());
+  }
 };

 class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
@ -426,6 +444,30 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
        )DOC")
        .SetDefault(false);
    AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
+    AddAttr<bool>(
+        "use_mkldnn",
+        "(bool, default false) Indicates if MKL-DNN kernel will be used")
+        .SetDefault(false);
+    /* int8 parameters */
+    AddAttr<bool>("use_quantizer",
+                  "(bool, default false) "
+                  "Set to true for operators that should be quantized and use "
+                  "int8 kernel. "
+                  "Only used on CPU.")
+        .SetDefault(false);
+    AddAttr<float>("Scale_x",
+                   "(float, default 1.0f), The quantize scale of X tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_y",
+                   "(float, default 1.0f), The quantize scale of Y tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_out",
+                   "(float, default 1.0f), The quantize scale of output data")
+        .SetDefault(1.0f);
+    AddAttr<bool>("force_fp32_output",
+                  "(bool, default false) Force INT8 kernel output FP32, only "
+                  "used in MKL-DNN INT8")
+        .SetDefault(false);
 #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA)
    AddAttr<int>("head_number", "The number of heads of the matrix")
        .SetDefault(1);
--- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once

-#include <mkldnn.h>
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
+#include "mkldnn.hpp"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/place.h"
 namespace paddle {
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py
@ -0,0 +1,165 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest, os
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
+
+
+@skip_check_grad_ci(reason="DNNL's MatMul doesn't implemend grad kernel.")
+class TestDnnlMatMulOp(OpTest):
+    def generate_data(self):
+        self.x = np.random.random((25, 2, 2)).astype("float32")
+        self.y = np.random.random((25, 2, 2)).astype("float32")
+        self.alpha = 1.0
+        self.out = self.alpha * np.matmul(self.x, self.y)
+
+    def set_attributes(self):
+        self.alpha = self.alpha if hasattr(self, 'alpha') else 1.0
+        self.attrs = {'alpha': self.alpha}
+
+    def setUp(self):
+        # Set max isa, otherwise fails on SKX and earlier
+        os.environ["DNNL_MAX_CPU_ISA"] = "AVX"
+        self.op_type = "matmul"
+        self._cpu_only = True
+        self.use_mkldnn = True
+        self.generate_data()
+        self.set_attributes()
+        self.attrs['use_mkldnn'] = True
+
+        self.inputs = {'X': self.x, 'Y': self.y}
+        self.outputs = {'Out': self.out}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestDnnlMatMulOpAlpha(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.random((17, 2, 3)).astype("float32")
+        self.y = np.random.random((17, 3, 2)).astype("float32")
+        self.alpha = 2.0
+        self.out = self.alpha * np.matmul(self.x, self.y)
+
+
+class TestDnnlMatMulOp2D(TestDnnlMatMulOp):
+    def print_tensor(self, name, tensor):
+        print(name)
+        print(tensor)
+
+    def generate_data(self):
+        self.x = np.random.random((12, 9)).astype("float32")
+        self.y = np.random.random((9, 12)).astype("float32")
+        self.out = np.matmul(self.x, self.y)
+
+
+class TestDnnlMatMulOpTransposeX(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.random((12, 9)).astype("float32")
+        self.y = np.random.random((12, 9)).astype("float32")
+        self.out = np.matmul(np.transpose(self.x), self.y)
+
+    def set_attributes(self):
+        self.attrs = {'transpose_X': True}
+
+
+class TestDnnlMatMulOpTransposeY(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.random((12, 9)).astype("float32")
+        self.y = np.random.random((12, 9)).astype("float32")
+        self.out = np.matmul(self.x, np.transpose(self.y))
+
+    def set_attributes(self):
+        self.attrs = {'transpose_Y': True}
+
+
+class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.random((17, 3, 2)).astype("float32")
+        self.y = np.random.random((17, 3, 2)).astype("float32")
+        self.out = np.matmul(self.x, np.transpose(self.y, (0, 2, 1)))
+
+    def set_attributes(self):
+        self.attrs = {'transpose_Y': True}
+
+
+class TestDnnlMatMulOpInt8NoScales(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.random((12, 9)).astype("int8")
+        self.y = np.random.random((9, 12)).astype("int8")
+        self.out = np.matmul(self.x, self.y)
+
+
+class TestDnnlMatMulOpInt8(TestDnnlMatMulOp):
+    def quantize(self, tensor):
+        scale = 127. / np.abs(np.amax(tensor))
+        quantized = np.round(scale * tensor).astype("int8")
+        return scale, quantized
+
+    def generate_data(self):
+        x_float = np.random.random((12, 9)).astype("float32")
+        self.x_scale, self.x = self.quantize(x_float)
+
+        y_float = np.random.random((9, 12)).astype("float32")
+        self.y_scale, self.y = self.quantize(y_float)
+
+        out_float = np.matmul(x_float, y_float)
+        self.out_scale, self.out = self.quantize(out_float)
+
+    def set_attributes(self):
+        self.attrs = {
+            'Scale_x': self.x_scale,
+            'Scale_y': self.y_scale,
+            'Scale_out': self.out_scale,
+        }
+
+    def test_check_output(self):
+        int_atol = 1
+        self.check_output(atol=int_atol)
+
+
+class TestDnnlMatMulOpInt8ForceFP32(TestDnnlMatMulOpInt8):
+    def generate_data(self):
+        x_float = np.random.random((12, 9)).astype("float32")
+        self.x_scale, self.x = self.quantize(x_float)
+
+        y_float = np.random.random((9, 12)).astype("float32")
+        self.y_scale, self.y = self.quantize(y_float)
+
+        out_float = np.matmul(x_float, y_float)
+        self.out = out_float
+
+    def set_attributes(self):
+        self.attrs = {
+            'Scale_x': self.x_scale,
+            'Scale_y': self.y_scale,
+            'force_fp32_output': True
+        }
+
+
+class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp):
+    def generate_data(self):
+        self.x = np.random.randint(0, 3, (12, 9)).astype("int8")
+        self.y = np.random.randint(0, 3, (9, 12)).astype("int8")
+        self.out = np.matmul(self.x, self.y).astype("float32")
+
+    def set_attributes(self):
+        self.attrs = {'force_fp32_output': True}
+
+
+if __name__ == "__main__":
+    unittest.main()