Add mkldnn int8 mul-op kernel (#17834)

6 years ago · 0caa08ea40
parent ac81c81be1
commit 0caa08ea40
6 changed files with 666 additions and 1 deletions
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
--- a/paddle/fluid/operators/mul_op.cc
+++ b/paddle/fluid/operators/mul_op.cc
@ -17,6 +17,9 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 #include <vector>
+#ifdef PADDLE_WITH_MKLDNN
+#include "paddle/fluid/platform/mkldnn_helper.h"
+#endif

 namespace paddle {
 namespace operators {
@ -76,6 +79,30 @@ class MulOp : public framework::OperatorWithKernel {
    ctx->SetOutputDim("Out", framework::make_ddim(output_dims));
    ctx->ShareLoD("X", /*->*/ "Out");
  }
+
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const {
+    framework::LibraryType library = framework::LibraryType::kPlain;
+    framework::DataLayout layout = framework::DataLayout::kAnyLayout;
+    int customized_type_value =
+        framework::OpKernelType::kDefaultCustomizedTypeValue;
+    auto input_data_type = ctx.Input<Tensor>("X")->type();
+#ifdef PADDLE_WITH_MKLDNN
+    if (library == framework::LibraryType::kPlain &&
+        platform::CanMKLDNNBeUsed(ctx)) {
+      library = framework::LibraryType::kMKLDNN;
+      layout = framework::DataLayout::kMKLDNN;
+
+      if (input_data_type == framework::DataTypeTrait<int8_t>::DataType ||
+          input_data_type == framework::DataTypeTrait<uint8_t>::DataType) {
+        customized_type_value = kMULMKLDNNINT8;
+      }
+    }
+#endif
+
+    return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
+                                   library, customized_type_value);
+  }
 };

 class MulOpMaker : public framework::OpProtoAndCheckerMaker {
@ -84,6 +111,9 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("X", "(Tensor), The first input tensor of mul op.");
    AddInput("Y", "(Tensor), The second input tensor of mul op.");
    AddOutput("Out", "(Tensor), The output tensor of mul op.");
+    AddAttr<bool>("use_mkldnn",
+                  "(bool, default false) Only used in mkldnn kernel")
+        .SetDefault(false);
    AddAttr<int>(
        "x_num_col_dims",
        R"DOC((int, default 1), The mul_op can take tensors with more than two
@ -114,6 +144,23 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
        )DOC")
        .SetDefault(1)
        .EqualGreaterThan(1);
+    AddAttr<float>("scale_x",
+                   "scale_x to used for int8 input data x."
+                   "Only used with MKL-DNN INT8")
+        .SetDefault(1.0f);
+    AddAttr<std::vector<float>>("scale_y",
+                                "scale_y to used for int8 input data y."
+                                "Only used with MKL-DNN INT8")
+        .SetDefault({1.0f});
+    AddAttr<float>("scale_out",
+                   "scale_out to be used for int8 output data."
+                   "Only used with MKL-DNN INT8")
+        .SetDefault(1.0f);
+    AddAttr<bool>(
+        "force_fp32_output",
+        "(bool, default false) Force quantize kernel output FP32, only "
+        "used in quantized MKL-DNN.")
+        .SetDefault(false);
    AddComment(R"DOC(
 Mul Operator.

@ -237,14 +284,19 @@ class MulDoubleGradMaker : public framework::SingleGradOpDescMaker {
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpInferVarType,
                  ops::MulOpGradMaker);
+
 REGISTER_OPERATOR(mul_grad, ops::MulGradOp, ops::MulDoubleGradMaker);
+
 REGISTER_OPERATOR(mul_grad_grad, ops::MulDoubleGradOp);
+
 REGISTER_OP_CPU_KERNEL(
    mul, ops::MulKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MulKernel<paddle::platform::CPUDeviceContext, double>);
+
 REGISTER_OP_CPU_KERNEL(
    mul_grad, ops::MulGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MulGradKernel<paddle::platform::CPUDeviceContext, double>);
+
 REGISTER_OP_CPU_KERNEL(
    mul_grad_grad,
    ops::MulDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
--- a/paddle/fluid/operators/mul_op.h
+++ b/paddle/fluid/operators/mul_op.h
@ -24,6 +24,8 @@ namespace operators {

 using Tensor = framework::Tensor;

+constexpr int kMULMKLDNNINT8 = 1;
+
 template <typename DeviceContext, typename T>
 class MulKernel : public framework::OpKernel<T> {
 public:
--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@ -15,6 +15,7 @@ limitations under the License. */

 #include <mkldnn.h>
 #include <algorithm>
+#include <memory>
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/operator.h"
@ -89,6 +90,16 @@ inline mkldnn::memory::data_type MKLDNNGetDataType<float>() {
  return mkldnn::memory::f32;
 }

+template <>
+inline mkldnn::memory::data_type MKLDNNGetDataType<int8_t>() {
+  return mkldnn::memory::s8;
+}
+
+template <>
+inline mkldnn::memory::data_type MKLDNNGetDataType<uint8_t>() {
+  return mkldnn::memory::u8;
+}
+
 inline void Reorder(const mkldnn::memory& src, const mkldnn::memory& dst) {
  auto reorder_prim = mkldnn::reorder(src, dst);
  std::vector<mkldnn::primitive> pipeline;
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py
@ -0,0 +1,166 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import paddle.fluid.core as core
+from paddle.fluid.tests.unittests.op_test import OpTest
+'''
+ test case for s8 * s8
+'''
+
+
+class TestMKLDNNMulOpS8S8(OpTest):
+    def setUp(self):
+        self.op_type = "mul"
+        self.init_kernel_type()
+        self.init_data_type()
+        self.init_data()
+        self.attrs = {
+            "use_mkldnn": self.use_mkldnn,
+            "scale_x": self.scale_x,
+            "scale_y": self.scale_y,
+            "scale_out": self.scale_out,
+            "force_fp32_output": self.force_fp32,
+        }
+
+    def init_kernel_type(self):
+        self.use_mkldnn = True
+        self.force_fp32 = True
+
+    def init_data_type(self):
+        self.srctype = np.uint8
+        self.dsttype = np.float32 if self.force_fp32 else np.int8
+
+    def init_data(self):
+        self.scale_x = 0.6
+        self.scale_y = [0.8]
+        self.scale_out = 1.0
+
+        # limit random range inside |-127, 127| to avoid overflow on SKL
+        if self.srctype == np.int8:
+            A_data = np.random.randint(-127, 127, (2, 5)).astype(np.int8)
+        else:
+            A_data = np.random.randint(0, 127, (2, 5)).astype(np.uint8)
+
+        B_data = np.random.uniform(-127, 127, (5, 3)).astype(np.float32)
+
+        quant_B = np.round(B_data * self.scale_y[0]).astype(np.int)
+        output = np.dot(A_data, quant_B)
+
+        scale_output_shift = (self.scale_out) / \
+            (self.scale_x * self.scale_y[0])
+
+        if (self.force_fp32):
+            output = (output * scale_output_shift).astype(self.dsttype)
+        else:
+            output = np.round(output * scale_output_shift).astype(self.dsttype)
+
+        self.inputs = {'X': A_data, 'Y': B_data}
+        self.outputs = {'Out': output}
+
+    def test_check_output(self):
+        self.check_output_with_place(core.CPUPlace(), atol=0)
+
+    def test_check_grad_normal(self):
+        pass
+
+    def test_check_grad_ingore_x(self):
+        pass
+
+    def test_check_grad_ingore_y(self):
+        pass
+
+
+'''
+ test case for  s8 * u8 
+'''
+
+
+class TestMKLDNNMulOpS8U8(TestMKLDNNMulOpS8S8):
+    def init_data_type(self):
+        self.srctype = np.uint8
+        self.dsttype = np.float32 if self.force_fp32 else np.int8
+
+
+'''
+ test case for  s8 * s8 
+'''
+
+
+class TestMKLDNNMulOpS8S8WithFlatten(TestMKLDNNMulOpS8S8):
+    def setUp(self):
+        self.op_type = "mul"
+        self.init_kernel_type()
+        self.init_data_type()
+        self.init_data()
+        self.attrs = {
+            "use_mkldnn": self.use_mkldnn,
+            "scale_x": self.scale_x,
+            "scale_y": self.scale_y,
+            "scale_out": self.scale_out,
+            "force_fp32_output": self.force_fp32,
+            "x_num_col_dims": 2,
+            "y_num_col_dims": 2,
+        }
+
+    def init_data(self):
+        self.scale_x = 0.6
+        self.scale_y = [0.8]
+        self.scale_out = 1.0
+
+        # limit random range inside |-127, 127| to avoid overflow on SKL
+        if self.srctype == np.int8:
+            A_data = np.random.randint(-127, 127, (3, 4, 4, 3)).astype(np.int8)
+        else:
+            A_data = np.random.randint(0, 127, (3, 4, 4, 3)).astype(np.uint8)
+
+        B_data = np.random.uniform(-127, 127,
+                                   (2, 6, 1, 2, 3)).astype(np.float32)
+
+        A_data_reshape = A_data.reshape(3 * 4, 4 * 3)
+        B_data_reshape = B_data.reshape(2 * 6, 1 * 2 * 3)
+
+        quant_B = np.round(B_data_reshape * self.scale_y[0]).astype(np.int)
+        output = np.dot(A_data_reshape, quant_B)
+
+        scale_output_shift = (self.scale_out) / \
+            (self.scale_x * self.scale_y[0])
+
+        if (self.force_fp32):
+            output = (output * scale_output_shift).astype(self.dsttype)
+        else:
+            output = np.round(output * scale_output_shift).astype(self.dsttype)
+
+        output = output.reshape(3, 4, 1, 2, 3)
+
+        self.inputs = {'X': A_data, 'Y': B_data}
+        self.outputs = {'Out': output}
+
+
+'''
+ test case for  s8 * u8 
+'''
+
+
+class TestMKLDNNMulOpS8U8WithFlatten(TestMKLDNNMulOpS8S8WithFlatten):
+    def init_data_type(self):
+        self.srctype = np.uint8
+        self.dsttype = np.float32 if self.force_fp32 else np.int8
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_operator_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py
@ -69,7 +69,8 @@ class TestOperator(unittest.TestCase):
            set(mul_op.attr_names),
            set([
                "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
-                "op_namescope", "op_callstack"
+                "use_mkldnn", "scale_x", "scale_y", "scale_out",
+                "force_fp32_output", "op_namescope", "op_callstack"
            ]))
        self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
        self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)