[DNNL][INT8][FP32] MatMul (#23395)

* Initial FP32 DNNL MatMul Implementation

* Implement int8 DNNL MatMul

* Unify in-kernel-naming, clean UTs

* MatmuL: Introduce op caching

* Final adjustments

test=develop

* Remove dy_graph disablement

test=develop

* Change dnnl header name to new one

test=develop

* Contrain multi head check to prevent fails

test=develop

* Resolve dnnl header problems on MAC CI

* Variable namings to kernel and skip_grad_ci added

test=develop

* Prevent MAC CI from failing

* Prevent windows build from failing

test=develop

* Modify UTs to conform to the rules

* Modify MatMul aux functions namings

test=develop
revert-23830-2.0-beta
Michał Gallus 6 years ago committed by GitHub
parent a2315d37cf
commit a63bcf9ae7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -17,6 +17,9 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/blas.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
@ -409,6 +412,21 @@ class MatMulOp : public framework::OperatorWithKernel {
context->SetOutputDim("Out", framework::make_ddim(dim_out)); context->SetOutputDim("Out", framework::make_ddim(dim_out));
context->ShareLoD("X", /*->*/ "Out"); context->ShareLoD("X", /*->*/ "Out");
} }
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
#ifdef PADDLE_WITH_MKLDNN
using mkldnn::memory;
if (platform::CanMKLDNNBeUsed(ctx)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
}; };
class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
@ -426,6 +444,30 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC") )DOC")
.SetDefault(false); .SetDefault(false);
AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f); AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
AddAttr<bool>(
"use_mkldnn",
"(bool, default false) Indicates if MKL-DNN kernel will be used")
.SetDefault(false);
/* int8 parameters */
AddAttr<bool>("use_quantizer",
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false);
AddAttr<float>("Scale_x",
"(float, default 1.0f), The quantize scale of X tensor")
.SetDefault(1.0f);
AddAttr<float>("Scale_y",
"(float, default 1.0f), The quantize scale of Y tensor")
.SetDefault(1.0f);
AddAttr<float>("Scale_out",
"(float, default 1.0f), The quantize scale of output data")
.SetDefault(1.0f);
AddAttr<bool>("force_fp32_output",
"(bool, default false) Force INT8 kernel output FP32, only "
"used in MKL-DNN INT8")
.SetDefault(false);
#if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA)
AddAttr<int>("head_number", "The number of heads of the matrix") AddAttr<int>("head_number", "The number of heads of the matrix")
.SetDefault(1); .SetDefault(1);

File diff suppressed because it is too large Load Diff

@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <mkldnn.h>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "mkldnn.hpp"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {

@ -0,0 +1,165 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest, os
import numpy as np
from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
@skip_check_grad_ci(reason="DNNL's MatMul doesn't implemend grad kernel.")
class TestDnnlMatMulOp(OpTest):
def generate_data(self):
self.x = np.random.random((25, 2, 2)).astype("float32")
self.y = np.random.random((25, 2, 2)).astype("float32")
self.alpha = 1.0
self.out = self.alpha * np.matmul(self.x, self.y)
def set_attributes(self):
self.alpha = self.alpha if hasattr(self, 'alpha') else 1.0
self.attrs = {'alpha': self.alpha}
def setUp(self):
# Set max isa, otherwise fails on SKX and earlier
os.environ["DNNL_MAX_CPU_ISA"] = "AVX"
self.op_type = "matmul"
self._cpu_only = True
self.use_mkldnn = True
self.generate_data()
self.set_attributes()
self.attrs['use_mkldnn'] = True
self.inputs = {'X': self.x, 'Y': self.y}
self.outputs = {'Out': self.out}
def test_check_output(self):
self.check_output()
class TestDnnlMatMulOpAlpha(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.random((17, 2, 3)).astype("float32")
self.y = np.random.random((17, 3, 2)).astype("float32")
self.alpha = 2.0
self.out = self.alpha * np.matmul(self.x, self.y)
class TestDnnlMatMulOp2D(TestDnnlMatMulOp):
def print_tensor(self, name, tensor):
print(name)
print(tensor)
def generate_data(self):
self.x = np.random.random((12, 9)).astype("float32")
self.y = np.random.random((9, 12)).astype("float32")
self.out = np.matmul(self.x, self.y)
class TestDnnlMatMulOpTransposeX(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.random((12, 9)).astype("float32")
self.y = np.random.random((12, 9)).astype("float32")
self.out = np.matmul(np.transpose(self.x), self.y)
def set_attributes(self):
self.attrs = {'transpose_X': True}
class TestDnnlMatMulOpTransposeY(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.random((12, 9)).astype("float32")
self.y = np.random.random((12, 9)).astype("float32")
self.out = np.matmul(self.x, np.transpose(self.y))
def set_attributes(self):
self.attrs = {'transpose_Y': True}
class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.random((17, 3, 2)).astype("float32")
self.y = np.random.random((17, 3, 2)).astype("float32")
self.out = np.matmul(self.x, np.transpose(self.y, (0, 2, 1)))
def set_attributes(self):
self.attrs = {'transpose_Y': True}
class TestDnnlMatMulOpInt8NoScales(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.random((12, 9)).astype("int8")
self.y = np.random.random((9, 12)).astype("int8")
self.out = np.matmul(self.x, self.y)
class TestDnnlMatMulOpInt8(TestDnnlMatMulOp):
def quantize(self, tensor):
scale = 127. / np.abs(np.amax(tensor))
quantized = np.round(scale * tensor).astype("int8")
return scale, quantized
def generate_data(self):
x_float = np.random.random((12, 9)).astype("float32")
self.x_scale, self.x = self.quantize(x_float)
y_float = np.random.random((9, 12)).astype("float32")
self.y_scale, self.y = self.quantize(y_float)
out_float = np.matmul(x_float, y_float)
self.out_scale, self.out = self.quantize(out_float)
def set_attributes(self):
self.attrs = {
'Scale_x': self.x_scale,
'Scale_y': self.y_scale,
'Scale_out': self.out_scale,
}
def test_check_output(self):
int_atol = 1
self.check_output(atol=int_atol)
class TestDnnlMatMulOpInt8ForceFP32(TestDnnlMatMulOpInt8):
def generate_data(self):
x_float = np.random.random((12, 9)).astype("float32")
self.x_scale, self.x = self.quantize(x_float)
y_float = np.random.random((9, 12)).astype("float32")
self.y_scale, self.y = self.quantize(y_float)
out_float = np.matmul(x_float, y_float)
self.out = out_float
def set_attributes(self):
self.attrs = {
'Scale_x': self.x_scale,
'Scale_y': self.y_scale,
'force_fp32_output': True
}
class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp):
def generate_data(self):
self.x = np.random.randint(0, 3, (12, 9)).astype("int8")
self.y = np.random.randint(0, 3, (9, 12)).astype("int8")
self.out = np.matmul(self.x, self.y).astype("float32")
def set_attributes(self):
self.attrs = {'force_fp32_output': True}
if __name__ == "__main__":
unittest.main()
Loading…
Cancel
Save