Add mkldnn int8 mul-op kernel (#17834)

sum_op
Physher 6 years ago committed by Tao Luo
parent ac81c81be1
commit 0caa08ea40

File diff suppressed because it is too large Load Diff

@ -17,6 +17,9 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include <vector>
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
@ -76,6 +79,30 @@ class MulOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", framework::make_ddim(output_dims));
ctx->ShareLoD("X", /*->*/ "Out");
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library = framework::LibraryType::kPlain;
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
auto input_data_type = ctx.Input<Tensor>("X")->type();
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
if (input_data_type == framework::DataTypeTrait<int8_t>::DataType ||
input_data_type == framework::DataTypeTrait<uint8_t>::DataType) {
customized_type_value = kMULMKLDNNINT8;
}
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
library, customized_type_value);
}
};
class MulOpMaker : public framework::OpProtoAndCheckerMaker {
@ -84,6 +111,9 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(Tensor), The first input tensor of mul op.");
AddInput("Y", "(Tensor), The second input tensor of mul op.");
AddOutput("Out", "(Tensor), The output tensor of mul op.");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<int>(
"x_num_col_dims",
R"DOC((int, default 1), The mul_op can take tensors with more than two
@ -114,6 +144,23 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC")
.SetDefault(1)
.EqualGreaterThan(1);
AddAttr<float>("scale_x",
"scale_x to used for int8 input data x."
"Only used with MKL-DNN INT8")
.SetDefault(1.0f);
AddAttr<std::vector<float>>("scale_y",
"scale_y to used for int8 input data y."
"Only used with MKL-DNN INT8")
.SetDefault({1.0f});
AddAttr<float>("scale_out",
"scale_out to be used for int8 output data."
"Only used with MKL-DNN INT8")
.SetDefault(1.0f);
AddAttr<bool>(
"force_fp32_output",
"(bool, default false) Force quantize kernel output FP32, only "
"used in quantized MKL-DNN.")
.SetDefault(false);
AddComment(R"DOC(
Mul Operator.
@ -237,14 +284,19 @@ class MulDoubleGradMaker : public framework::SingleGradOpDescMaker {
namespace ops = paddle::operators;
REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpInferVarType,
ops::MulOpGradMaker);
REGISTER_OPERATOR(mul_grad, ops::MulGradOp, ops::MulDoubleGradMaker);
REGISTER_OPERATOR(mul_grad_grad, ops::MulDoubleGradOp);
REGISTER_OP_CPU_KERNEL(
mul, ops::MulKernel<paddle::platform::CPUDeviceContext, float>,
ops::MulKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
mul_grad, ops::MulGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::MulGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
mul_grad_grad,
ops::MulDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,

@ -24,6 +24,8 @@ namespace operators {
using Tensor = framework::Tensor;
constexpr int kMULMKLDNNINT8 = 1;
template <typename DeviceContext, typename T>
class MulKernel : public framework::OpKernel<T> {
public:

@ -15,6 +15,7 @@ limitations under the License. */
#include <mkldnn.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
@ -89,6 +90,16 @@ inline mkldnn::memory::data_type MKLDNNGetDataType<float>() {
return mkldnn::memory::f32;
}
template <>
inline mkldnn::memory::data_type MKLDNNGetDataType<int8_t>() {
return mkldnn::memory::s8;
}
template <>
inline mkldnn::memory::data_type MKLDNNGetDataType<uint8_t>() {
return mkldnn::memory::u8;
}
inline void Reorder(const mkldnn::memory& src, const mkldnn::memory& dst) {
auto reorder_prim = mkldnn::reorder(src, dst);
std::vector<mkldnn::primitive> pipeline;

@ -0,0 +1,166 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest
'''
test case for s8 * s8
'''
class TestMKLDNNMulOpS8S8(OpTest):
def setUp(self):
self.op_type = "mul"
self.init_kernel_type()
self.init_data_type()
self.init_data()
self.attrs = {
"use_mkldnn": self.use_mkldnn,
"scale_x": self.scale_x,
"scale_y": self.scale_y,
"scale_out": self.scale_out,
"force_fp32_output": self.force_fp32,
}
def init_kernel_type(self):
self.use_mkldnn = True
self.force_fp32 = True
def init_data_type(self):
self.srctype = np.uint8
self.dsttype = np.float32 if self.force_fp32 else np.int8
def init_data(self):
self.scale_x = 0.6
self.scale_y = [0.8]
self.scale_out = 1.0
# limit random range inside |-127, 127| to avoid overflow on SKL
if self.srctype == np.int8:
A_data = np.random.randint(-127, 127, (2, 5)).astype(np.int8)
else:
A_data = np.random.randint(0, 127, (2, 5)).astype(np.uint8)
B_data = np.random.uniform(-127, 127, (5, 3)).astype(np.float32)
quant_B = np.round(B_data * self.scale_y[0]).astype(np.int)
output = np.dot(A_data, quant_B)
scale_output_shift = (self.scale_out) / \
(self.scale_x * self.scale_y[0])
if (self.force_fp32):
output = (output * scale_output_shift).astype(self.dsttype)
else:
output = np.round(output * scale_output_shift).astype(self.dsttype)
self.inputs = {'X': A_data, 'Y': B_data}
self.outputs = {'Out': output}
def test_check_output(self):
self.check_output_with_place(core.CPUPlace(), atol=0)
def test_check_grad_normal(self):
pass
def test_check_grad_ingore_x(self):
pass
def test_check_grad_ingore_y(self):
pass
'''
test case for s8 * u8
'''
class TestMKLDNNMulOpS8U8(TestMKLDNNMulOpS8S8):
def init_data_type(self):
self.srctype = np.uint8
self.dsttype = np.float32 if self.force_fp32 else np.int8
'''
test case for s8 * s8
'''
class TestMKLDNNMulOpS8S8WithFlatten(TestMKLDNNMulOpS8S8):
def setUp(self):
self.op_type = "mul"
self.init_kernel_type()
self.init_data_type()
self.init_data()
self.attrs = {
"use_mkldnn": self.use_mkldnn,
"scale_x": self.scale_x,
"scale_y": self.scale_y,
"scale_out": self.scale_out,
"force_fp32_output": self.force_fp32,
"x_num_col_dims": 2,
"y_num_col_dims": 2,
}
def init_data(self):
self.scale_x = 0.6
self.scale_y = [0.8]
self.scale_out = 1.0
# limit random range inside |-127, 127| to avoid overflow on SKL
if self.srctype == np.int8:
A_data = np.random.randint(-127, 127, (3, 4, 4, 3)).astype(np.int8)
else:
A_data = np.random.randint(0, 127, (3, 4, 4, 3)).astype(np.uint8)
B_data = np.random.uniform(-127, 127,
(2, 6, 1, 2, 3)).astype(np.float32)
A_data_reshape = A_data.reshape(3 * 4, 4 * 3)
B_data_reshape = B_data.reshape(2 * 6, 1 * 2 * 3)
quant_B = np.round(B_data_reshape * self.scale_y[0]).astype(np.int)
output = np.dot(A_data_reshape, quant_B)
scale_output_shift = (self.scale_out) / \
(self.scale_x * self.scale_y[0])
if (self.force_fp32):
output = (output * scale_output_shift).astype(self.dsttype)
else:
output = np.round(output * scale_output_shift).astype(self.dsttype)
output = output.reshape(3, 4, 1, 2, 3)
self.inputs = {'X': A_data, 'Y': B_data}
self.outputs = {'Out': output}
'''
test case for s8 * u8
'''
class TestMKLDNNMulOpS8U8WithFlatten(TestMKLDNNMulOpS8S8WithFlatten):
def init_data_type(self):
self.srctype = np.uint8
self.dsttype = np.float32 if self.force_fp32 else np.int8
if __name__ == '__main__':
unittest.main()

@ -69,7 +69,8 @@ class TestOperator(unittest.TestCase):
set(mul_op.attr_names),
set([
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
"op_namescope", "op_callstack"
"use_mkldnn", "scale_x", "scale_y", "scale_out",
"force_fp32_output", "op_namescope", "op_callstack"
]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)

Loading…
Cancel
Save