From ccd6b9a41569c212722ca48e214b2eda46dcdc52 Mon Sep 17 00:00:00 2001 From: zhanyuan Date: Mon, 3 Aug 2020 17:49:05 +0800 Subject: [PATCH] Add fp32 & int8 ops of Matmul(Batchmatmul) --- mindspore/lite/src/ops/matmul.cc | 29 +-- mindspore/lite/src/ops/ops.cc | 2 + .../runtime/kernel/arm/base/matmul_base.cc | 72 ++++++++ .../src/runtime/kernel/arm/base/matmul_base.h | 49 +++++ .../src/runtime/kernel/arm/fp32/matmul.cc | 108 ++++++++--- .../lite/src/runtime/kernel/arm/fp32/matmul.h | 19 +- .../kernel/arm/int8/fullconnection_int8.h | 2 +- .../runtime/kernel/arm/int8/matmul_int8.cc | 142 +++++++++++++++ .../src/runtime/kernel/arm/int8/matmul_int8.h | 47 +++++ .../runtime/kernel/arm/opclib/common_func.cc | 17 ++ .../runtime/kernel/arm/opclib/common_func.h | 2 + .../runtime/kernel/arm/opclib/fp32/matmul.cc | 4 +- .../runtime/kernel/arm/opclib/int8/matmul.cc | 13 +- .../runtime/kernel/arm/opclib/int8/matmul.h | 2 +- .../src/runtime/kernel/arm/opclib/matmul.h | 1 + .../kernel/arm/opclib/quantization/quantize.h | 21 ++- .../kernel/arm/fp32/matmul_fp32_tests.cc | 169 ++++++++++++++++++ .../arm/int8/fullconnection_int8_tests.cc | 19 +- .../kernel/arm/int8/matmul_int8_tests.cc | 126 +++++++++++++ 19 files changed, 769 insertions(+), 75 deletions(-) create mode 100644 mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc diff --git a/mindspore/lite/src/ops/matmul.cc b/mindspore/lite/src/ops/matmul.cc index 2d031378bf..619fcade8c 100644 --- a/mindspore/lite/src/ops/matmul.cc +++ b/mindspore/lite/src/ops/matmul.cc @@ -33,29 +33,30 @@ int MatMul::InferShape(std::vector inputs_, std::vector x_shape = input0->shape(); - std::vector w_shape = input1->shape(); - if (x_shape.size() < 2 || w_shape.size() < 2) { + std::vector a_shape = input0->shape(); + std::vector b_shape = input1->shape(); + if (a_shape.size() < 3 || b_shape.size() < 3) { MS_LOG(ERROR) << "inputs shape is invalid"; return RET_INPUT_TENSOR_ERROR; } + for (int i = 0; i < a_shape.size() - 2; ++i) { + if (a_shape[i] != b_shape[i]) { + MS_LOG(ERROR) << "Op MatMul's dimensions must be equal"; + return RET_INPUT_TENSOR_ERROR; + } + } + auto matmul_prim = this->primitive->value_as_MatMul(); if (matmul_prim->transposeA()) { - int tmp = x_shape.back(); - x_shape[x_shape.size() - 1] = x_shape[x_shape.size() - 2]; - x_shape[x_shape.size() - 2] = tmp; + std::swap(a_shape[a_shape.size() - 1], a_shape[a_shape.size() - 2]); } if (matmul_prim->transposeB()) { - int tmp = w_shape.back(); - w_shape[w_shape.size() - 1] = w_shape[w_shape.size() - 2]; - w_shape[w_shape.size() - 2] = tmp; + std::swap(b_shape[b_shape.size() - 1], b_shape[b_shape.size() - 2]); } - auto y_shape_size = std::max(x_shape.size(), w_shape.size()); - std::vector y_shape(y_shape_size); - y_shape = x_shape; - y_shape[y_shape_size - 1] = w_shape[w_shape.size() - 1]; - output->set_shape(y_shape); + std::vector c_shape(a_shape); + c_shape[c_shape.size() - 1] = b_shape[b_shape.size() - 1]; + output->set_shape(c_shape); output->set_data_type(input0->data_type()); output->SetFormat(input0->GetFormat()); diff --git a/mindspore/lite/src/ops/ops.cc b/mindspore/lite/src/ops/ops.cc index a90f1a75d9..85c20267ee 100644 --- a/mindspore/lite/src/ops/ops.cc +++ b/mindspore/lite/src/ops/ops.cc @@ -139,6 +139,8 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) { return new lite::SpaceToBatch(const_cast(primitive)); case schema::PrimitiveType_QuantDTypeCast: return new lite::QuantDTypeCast(const_cast(primitive)); + case schema::PrimitiveType_MatMul: + return new lite::MatMul(const_cast(primitive)); default: break; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc new file mode 100644 index 0000000000..eb88cfb4b3 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/base/matmul_base.h" +#include "src/runtime/kernel/arm/fp32/matmul.h" +#include "src/runtime/kernel/arm/int8/matmul_int8.h" +#include "src/kernel_factory.h" +#include "include/errorcode.h" +#include "include/context.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_MatMul; + +namespace mindspore::kernel { +kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const lite::Context *ctx, const kernel::KernelKey &desc) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_Concat); + auto input_tensor = inputs.at(kInputIndex); + auto data_type = input_tensor->data_type(); + kernel::LiteKernel *kernel = nullptr; + switch (data_type) { + case kNumberTypeInt8: + case kNumberTypeUInt8: { + kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx); + if (!kernel) { + MS_LOG(ERROR) << "kernel is nullptr."; + return nullptr; + } + break; + } + + case kNumberTypeFloat32: { + kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx); + if (!kernel) { + MS_LOG(ERROR) << "kernel is nullptr."; + return nullptr; + } + break; + } + + default: + break; + } + + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h new file mode 100644 index 0000000000..92c8dd07e3 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ + +#include +#include "src/lite_kernel.h" +#include "include/context.h" +#include "src/runtime/kernel/arm/opclib/matmul.h" + +using mindspore::lite::Context; + +namespace mindspore::kernel { +class MatmulBaseCPUKernel : public LiteKernel { + public: + MatmulBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx) + : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->threadNum) { + params_ = reinterpret_cast(opParameter); + } + ~MatmulBaseCPUKernel() = default; + + int Init() override { return 0; } + int ReSize() override { return 0; } + int Run() override { return 0; } + + protected: + MatMulParameter *params_; + int thread_count_; + int thread_stride_; + const Context *ctx_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc index 79f7f65b58..ee6606d66c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc @@ -15,44 +15,102 @@ */ #include "src/runtime/kernel/arm/fp32/matmul.h" -#include -#include "schema/model_generated.h" -#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/opclib/fp32/matmul.h" +#include "src/runtime/runtime_api.h" #include "include/errorcode.h" -using mindspore::kernel::KERNEL_ARCH::kCPU; -using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_MatMul; namespace mindspore::kernel { +MatmulCPUKernel::~MatmulCPUKernel() { + ctx_->allocator->Free(a_c8_ptr_); + ctx_->allocator->Free(b_r8_ptr_); + ctx_->allocator->Free(c_r8x8_ptr_); +} int MatmulCPUKernel::ReSize() { return RET_OK; } -int MatmulCPUKernel::Run() { return RET_OK; } +int MatmulCPUKernel::Init() { + int batch = 1; + auto x_shape = inputs_[0]->shape(); + auto o_shape = outputs_[0]->shape(); + for (int i = 0; i < x_shape.size() - 2; ++i) { + batch *= x_shape[i]; + } + params_->batch = batch; + params_->row_ = o_shape[o_shape.size() - 2]; + params_->col_ = o_shape[o_shape.size() - 1]; + params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1]; + params_->row_8_ = UP_ROUND(params_->row_, 8); + params_->col_8_ = UP_ROUND(params_->col_, 8); + thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); + thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); -int MatmulCPUKernel::Init() { return RET_OK; } + a_c8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(float))); + if (!a_c8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(float)); + b_r8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(float))); + if (!b_r8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(float)); + c_r8x8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(float))); + if (!c_r8x8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(float)); + return RET_OK; +} -kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, - const kernel::KernelKey &desc) { - MS_ASSERT(desc.type == schema::PrimitiveType_MatMul); - auto *kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << "new MatmulCPUKernel fail!"; - return nullptr; +int MatmulCPUKernel::RunImpl(int task_id) { + int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); + if (cur_oc <= 0) { + return RET_OK; } - auto ret = kernel->Init(); - if (ret != RET_OK) { - delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); - return nullptr; + auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_; + auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_; + MatMul(a_c8_ptr_, cur_b, cur_c, NULL, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8); + return RET_OK; +} + +int MatmulFloatRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto op = reinterpret_cast(cdata); + auto error_code = op->RunImpl(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "MatmulFp32Run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; } - return kernel; + return RET_OK; } -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulFp32KernelCreator) +int MatmulCPUKernel::Run() { + auto a_ptr = reinterpret_cast(inputs_[0]->Data()); + auto b_ptr = reinterpret_cast(inputs_[1]->Data()); + auto c_ptr = reinterpret_cast(outputs_[0]->Data()); + auto a_stride = params_->row_ * params_->deep_; + auto b_stride = params_->deep_ * params_->col_; + auto c_stride = params_->row_ * params_->col_; + for (int i = 0; i < params_->batch; ++i) { + auto cur_a_ptr = a_ptr + i * a_stride; + auto cur_b_ptr = b_ptr + i * b_stride; + auto cur_c_ptr = c_ptr + i * c_stride; + if (params_->a_transpose_) { + RowMajor2Row8Major(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_); + } else { + RowMajor2Col8Major(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_); + } + if (params_->b_transpose_) { + RowMajor2Col8Major(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_); + } else { + RowMajor2Row8Major(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_); + } + LiteBackendParallelLaunch(MatmulFloatRun, this, thread_count_); + Row8x8Major2RowMajor(c_r8x8_ptr_, cur_c_ptr, params_->row_, params_->col_); + } + return RET_OK; +} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h index 3dfc4521eb..14594dc3f0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h @@ -19,27 +19,26 @@ #include #include "src/lite_kernel.h" - #include "src/runtime/kernel/arm/opclib/matmul.h" +#include "src/runtime/kernel/arm/base/matmul_base.h" namespace mindspore::kernel { -class MatmulCPUKernel : public LiteKernel { +class MatmulCPUKernel : public MatmulBaseCPUKernel { public: explicit MatmulCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : LiteKernel(parameter, inputs, outputs) { - matmul_param_ = reinterpret_cast(parameter); - } - ~MatmulCPUKernel() override = default; - + const std::vector &outputs, const Context *ctx) + : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {} + ~MatmulCPUKernel() override; int Init() override; int ReSize() override; int Run() override; + int RunImpl(int task_id); private: - MatMulParameter *matmul_param_; + float *a_c8_ptr_; + float *b_r8_ptr_; + float *c_r8x8_ptr_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_MATMUL_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h index 4bc62f1b08..08c9673b71 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h @@ -42,7 +42,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { int RunImpl(int task_id); private: - FcQuantArg quant_params_; + MatmulQuantArg quant_params_; int8_t *a_c8_ptr_; int8_t *b_r8_ptr_; int *c_r8x8_ptr_; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc new file mode 100644 index 0000000000..926d282679 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc @@ -0,0 +1,142 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/int8/matmul_int8.h" +#include "src/runtime/kernel/arm/opclib/int8/matmul.h" +#include "src/runtime/kernel/arm/opclib/common_func.h" +#include "src/runtime/runtime_api.h" +#include "include/errorcode.h" + +using mindspore::lite::RET_MEMORY_FAILED; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { + ctx_->allocator->Free(a_c8_ptr_); + ctx_->allocator->Free(b_r8_ptr_); + ctx_->allocator->Free(c_r8x8_ptr_); +} + +int MatmulInt8CPUKernel::Init() { + int batch = 1; + auto x_shape = inputs_[0]->shape(); + auto o_shape = outputs_[0]->shape(); + for (int i = 0; i < x_shape.size() - 2; ++i) { + batch *= x_shape[i]; + } + params_->batch = batch; + params_->row_ = o_shape[o_shape.size() - 2]; + params_->col_ = o_shape[o_shape.size() - 1]; + params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1]; + params_->row_8_ = UP_ROUND(params_->row_, 8); + params_->col_8_ = UP_ROUND(params_->col_, 8); + thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); + thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); + + a_c8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(int8_t))); + if (!a_c8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(int8_t)); + b_r8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(int8_t))); + if (!b_r8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(int8_t)); + c_r8x8_ptr_ = reinterpret_cast(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(int))); + if (!c_r8x8_ptr_) { + return RET_MEMORY_FAILED; + } + memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(int)); + + auto input_tensor = inputs_[0]; + auto params = input_tensor->GetQuantParams(); + MS_ASSERT(params.size() == 1); + quant_params_.input.zp_ = params.front().zeroPoint; + quant_params_.input.scale_ = params.front().scale; + auto weight_tensor = inputs_[1]; + params = weight_tensor->GetQuantParams(); + MS_ASSERT(params.size() == 1); + quant_params_.weight.zp_ = params.front().zeroPoint; + quant_params_.weight.scale_ = params.front().scale; + auto output_tensor = outputs_[0]; + params = output_tensor->GetQuantParams(); + MS_ASSERT(params.size() == 1); + quant_params_.output.zp_ = params.front().zeroPoint; + quant_params_.output.scale_ = params.front().scale; + + double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_; + QuantizeRoundParameter(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift, + &quant_params_.right_shift); + return RET_OK; +} + +int MatmulInt8CPUKernel::ReSize() { return RET_OK; } + +int MatmulInt8CPUKernel::RunImpl(int task_id) { + int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); + if (cur_oc <= 0) { + return RET_OK; + } + auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_; + auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_; + MatMulInt8(a_c8_ptr_, cur_b, cur_c, params_->row_8_, cur_oc * 8, params_->deep_, quant_params_.input.zp_, + quant_params_.weight.zp_); + return RET_OK; +} + +int MatmulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto op = reinterpret_cast(cdata); + auto ret = op->RunImpl(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "MatmulInt8Run error task_id[" << task_id << "] error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} + +int MatmulInt8CPUKernel::Run() { + auto a_ptr = reinterpret_cast(inputs_[0]->Data()); + auto b_ptr = reinterpret_cast(inputs_[1]->Data()); + auto c_ptr = reinterpret_cast(outputs_[0]->Data()); + auto a_stride = params_->row_ * params_->deep_; + auto b_stride = params_->deep_ * params_->col_; + auto c_stride = params_->row_ * params_->col_; + + for (int i = 0; i < params_->batch; ++i) { + auto cur_a_ptr = a_ptr + i * a_stride; + auto cur_b_ptr = b_ptr + i * b_stride; + auto cur_c_ptr = c_ptr + i * c_stride; + if (params_->a_transpose_) { + RowMajor2Row8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_); + } else { + RowMajor2Col8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_); + } + if (params_->b_transpose_) { + RowMajor2Col8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_); + } else { + RowMajor2Row8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_); + } + LiteBackendParallelLaunch(MatmulInt8Run, this, thread_count_); + auto &q = quant_params_; + SimplePostFuncInt8(c_r8x8_ptr_, cur_c_ptr, params_->col_, params_->row_, params_->row_8_, q.quant_multiplier, + q.left_shift, q.right_shift, q.output.zp_); + } + + return RET_OK; +} + +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h new file mode 100644 index 0000000000..9081babe85 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ + +#include +#include "include/context.h" +#include "src/runtime/kernel/arm/opclib/quantization/quantize.h" +#include "src/runtime/kernel/arm/base/matmul_base.h" + +using mindspore::lite::Context; + +namespace mindspore::kernel { +class MatmulInt8CPUKernel : public MatmulBaseCPUKernel { + public: + MatmulInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx) + : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {} + ~MatmulInt8CPUKernel() override; + int Init() override; + int ReSize() override; + int Run() override; + int RunImpl(int task_id); + + private: + MatmulQuantArg quant_params_; + int8_t *a_c8_ptr_; + int8_t *b_r8_ptr_; + int *c_r8x8_ptr_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc index ae2eaf554f..d2f7929fd4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc @@ -236,3 +236,20 @@ void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane } return; } + +void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, + int32_t left_shift, int32_t right_shift, int32_t zp) { + /* (int32_t)row8x8-major * multiplier => (int8_t)row-major */ + for (int r = 0; r < plane; r++) { + for (int c = 0; c < oc; c++) { + int c8div = c / 8, c8mod = c % 8; + int src_index = c8div * plane8 * 8 + r * 8 + c8mod; + int dst_index = r * oc + c; + int32_t value = in[src_index]; + value = MultiplyByQuantizedMultiplier(value, multiplier, left_shift, right_shift) + zp; + value = MSMIN(CHAR_MAX, value); + value = MSMAX(CHAR_MIN, value); + out[dst_index] = (int8_t)value; + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h b/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h index e8c3f587f0..f35d158dfb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h @@ -33,6 +33,8 @@ void ReluFp32(float *data, int ele_num); void Relu6Fp32(float *data, int ele_num); void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, int32_t left_shift, int32_t right_shift, int32_t zp, int8_t mini, int8_t maxi); +void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier, + int32_t left_shift, int32_t right_shift, int32_t zp); void IndirectGemmFp32_8x8(float *output, const float *input, const float *weight, const float *bias, size_t step, size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4, size_t relu, size_t relu6); diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc index 8f396cd44a..8c64a4a66f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc @@ -65,9 +65,7 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActT size_t bi = c8div * deep * 8 + d * 8 + c8mod; value = value + a[ai] * b[bi]; } - if (bias != nullptr) { - value += bias[col]; - } + if (bias != nullptr) value += bias[col]; if (act_type == ActType_Relu6) value = MSMIN(6.0f, value); if (act_type != ActType_No) value = MSMAX(0.0f, value); c[ci] = value; diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc index 0517f8b5fd..67dfb40f4b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc @@ -18,6 +18,17 @@ #include #include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h" +void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) { + for (int r = 0; r < row; r++) { + int8_t *src = src_ptr + r * col; + for (int c = 0; c < col; c++) { + int cd8 = c / 8; + int cm8 = c % 8; + dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c]; + } + } +} + void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) { for (int r = 0; r < row; r++) { int rd8 = r / 8; @@ -26,7 +37,6 @@ void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) dst_ptr[rd8 * col * 8 + c * 8 + rm8] = src_ptr[r * col + c]; } } - return; } void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep, @@ -46,5 +56,4 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, co c[ci] = value; } } - return; } diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h index 6fc2166461..d51b783932 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h @@ -22,7 +22,7 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep, const int32_t a_zp, const int32_t b_zp); +void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); #endif // MINDSPORE_LITE_SRC_BACKEND_ARM_OPCLIB_INT8_MATMUL_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h b/mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h index ad105d8d25..2851bd55ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h @@ -29,6 +29,7 @@ struct MatMulParameter { int col_8_; int deep_; bool has_bias_; + int batch; bool a_transpose_; /* false : row-major */ bool b_transpose_; /* true : col-major */ ActType act_type_; diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h b/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h index b0bf995535..aafa2b6883 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h @@ -22,6 +22,7 @@ #include #include #include +#include "src/runtime/kernel/arm/opclib/op_base.h" struct QuantArg { double scale_; @@ -49,7 +50,7 @@ struct ConcatQuantArg { QuantArg out_quant_args_; }; -struct FcQuantArg { +struct MatmulQuantArg { QuantArg input; QuantArg weight; QuantArg output; @@ -130,4 +131,22 @@ inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32 *mini = min; *maxi = max; } + +// quantize from float to int8 +inline void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { + for (int i = 0; i < length; ++i) { + int r = (int)round(input_data[i] / scale + zero_point); + int8_t q = r > CHAR_MAX ? CHAR_MAX : r; + q = q < CHAR_MIN ? CHAR_MIN : q; + output_data[i] = q; + } +} + +// dequantize from int8 to float +inline void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { + for (int i = 0; i < length; ++i) { + output_data[i] = scale * (input_data[i] - zero_point); + } +} + #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc new file mode 100644 index 0000000000..0f22149ed4 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc @@ -0,0 +1,169 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "mindspore/core/utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h" +#include "src/kernel_registry.h" +#include "src/lite_kernel.h" + +namespace mindspore { +class TestMatMulFp32 : public mindspore::Common { + public: + TestMatMulFp32() {} +}; + +int MMTestInit(std::vector *inputs_, std::vector *outputs_, + float *a_ptr, float *b_ptr, std::vector a_shape, std::vector b_shape, + std::vector c_shape) { + auto in_t = + new lite::tensor::Tensor(kNumberTypeFloat, a_shape, schema::Format_NHWC, static_cast(1)); + in_t->MallocData(); + memcpy(in_t->Data(), a_ptr, sizeof(float) * in_t->ElementsNum()); + inputs_->push_back(in_t); + + auto weight_t = + new lite::tensor::Tensor(kNumberTypeFloat, b_shape, schema::Format_NHWC, static_cast(1)); + weight_t->MallocData(); + memcpy(weight_t->Data(), b_ptr, sizeof(float) * weight_t->ElementsNum()); + inputs_->push_back(weight_t); + + auto out_t = + new lite::tensor::Tensor(kNumberTypeFloat, c_shape, schema::Format_NHWC, static_cast(1)); + out_t->MallocData(); + outputs_->push_back(out_t); + + return out_t->ElementsNum(); +} + +TEST_F(TestMatMulFp32, simple) { + std::vector inputs_; + std::vector outputs_; + auto matmul_param = new MatMulParameter(); + matmul_param->a_transpose_ = false; + matmul_param->b_transpose_ = false; + matmul_param->has_bias_ = false; + float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383, + 17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792}; + float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435, + 0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552, + -0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459, + 0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343}; + std::vector a_shape = {1, 2, 8}; + std::vector b_shape = {1, 8, 3}; + std::vector c_shape = {1, 2, 3}; + int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); + auto ctx = new lite::Context; + ctx->threadNum = 2; + auto mm = new kernel::MatmulCPUKernel(reinterpret_cast(matmul_param), inputs_, outputs_, ctx); + mm->Init(); + mm->Run(); + float correct[] = {-0.1256939023733139, -0.07744802534580231, 0.07410638779401779, + -0.3049793541431427, -0.027687929570674896, -0.18109679222106934}; + CompareOutputData(reinterpret_cast(outputs_[0]->Data()), correct, total_size, 0.0001); + delete matmul_param; + delete mm; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; +} + +TEST_F(TestMatMulFp32, simple_transb) { + std::vector inputs_; + std::vector outputs_; + auto matmul_param = new MatMulParameter(); + matmul_param->a_transpose_ = false; + matmul_param->b_transpose_ = true; + matmul_param->has_bias_ = false; + float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383, + 17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792}; + float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435, + 0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552, + -0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459, + 0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343}; + std::vector a_shape = {1, 2, 8}; + std::vector b_shape = {1, 3, 8}; + std::vector c_shape = {1, 2, 3}; + int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); + auto ctx = new lite::Context; + ctx->threadNum = 2; + auto mm = new kernel::MatmulCPUKernel(reinterpret_cast(matmul_param), inputs_, outputs_, ctx); + mm->Init(); + mm->Run(); + float correct[] = {0.00533547, 0.002545945, 0.062974121, -0.445441471, -0.246223617, -0.142070031}; + CompareOutputData(reinterpret_cast(outputs_[0]->Data()), correct, total_size, 0.0001); + delete matmul_param; + delete mm; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; +} + +TEST_F(TestMatMulFp32, batch) { + std::vector inputs_; + std::vector outputs_; + auto matmul_param = new MatMulParameter(); + matmul_param->a_transpose_ = false; + matmul_param->b_transpose_ = true; + matmul_param->has_bias_ = false; + float a[] = {-4.946672525326248, 11.154420027909701, -7.831129637356922, 17.309845099949953, -10.46177877610444, + 2.5412751480833897, 2.700113860276929, -12.616715572097341, -15.513316568881574, -9.513294738065516, + 17.931148376418896, -10.83801964632579, -14.023733862948017, -14.50805001403956, 0.7952221556310306, + 6.619720423569035, -19.277904230909357, -13.450479287024839, 19.914652156692625, 16.542571697048878, + -2.9715041389268926, 4.949555349889412, -1.9408110276290103, -15.062828261031868, 0.20012569643335, + 8.260383531209776, 3.1092344458607357, 16.742272486091487, 17.31277252415167, -16.60303202099434, + -8.980314693173042, -11.735087989358268, -14.918976184088514, -11.347592686892733, 11.808756029220604, + -18.76179414554809, 7.579758962360987, 3.13240880962163, 6.528181981442103, -16.802624652419794, + -14.323146919914901, -16.197579076296144, 9.738053920125779, -12.245780062949866, 8.817905278096319, + 0.5261391331275007, -18.26152522535471, -2.400461208771226}; + float b[] = { + -0.895183867395529, -0.8146900207660068, -0.27931593219652817, 0.783554361201179, -0.05080215007779798, + -0.9879631271568501, 0.07710949009001333, -0.9562579726211344, 0.29505553318356825, -0.26651960351085124, + -0.12755456259718279, -0.8221417897250098, -0.5094334041431876, -0.9117373380256013, 0.991501784215064, + 0.20131976450979394, 0.07889260559412059, -0.8138407752750305, -0.047622075866657454, -0.2778043115153188, + -0.6269973420163957, -0.44345812666611617, -0.8571568605933642, 0.020192166011526735, 0.4860054298402434, + 0.41525925469513614, -0.40270506445219967, -0.8716538067535347, 0.5276448387223114, 0.6064500154192936, + -0.9553204135772526, 0.3253219646257437, -0.7237956595774822, 0.3271284879679077, -0.534543967339336, + -0.4076498484281894, 0.01574797075171963, -0.37322004720586244, 0.16425071396119928, -0.5328652244800547, + 0.7389336170615435, -0.6552069958923377, -0.042305872596973604, -0.6714941466767734, -0.9281411415119043, + -0.7748558258281224, -0.6209799945964443, 0.02526428593887675, -0.44984776800225856, 0.6281401952319337, + 0.9907258228680276, 0.6288646615999687, -0.82076880150175, 0.3065944740797497, -0.29201038744043584, + -0.025685501802048982, -0.07273175145419652, 0.9370449239208709, -0.8233807408078093, -0.4195634619023012, + 0.9799555630257346, -0.23461882935715228, -0.8884793313829993, -0.4760267734754635, -0.2874539543614072, + -0.8795685985480997, -0.08099698251915255, -0.1626521023321741, -0.9337167240793414, 0.40924842916829207, + -0.7375713045221615, -0.0065659291539015285}; + std::vector a_shape = {3, 2, 8}; + std::vector b_shape = {3, 3, 8}; + std::vector c_shape = {3, 2, 3}; + int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape); + auto ctx = new lite::Context; + ctx->threadNum = 1; + auto mm = new kernel::MatmulCPUKernel(reinterpret_cast(matmul_param), inputs_, outputs_, ctx); + mm->Init(); + mm->Run(); + float correct[] = {21.38518524169922, -14.514888763427734, -11.040614128112793, 16.91403579711914, + 27.07421112060547, 23.35394287109375, -39.006141662597656, -2.021998405456543, + -17.63555145263672, -8.490625381469727, 5.317771911621094, -14.561882019042969, + -7.251564025878906, -2.508212089538574, 5.86458683013916, -3.466249465942383, + 8.869029998779297, 25.034008026123047}; + + float *output = reinterpret_cast(outputs_[0]->Data()); + for (int i = 0; i < 18; ++i) printf("%f ", output[i]); + CompareOutputData(reinterpret_cast(outputs_[0]->Data()), correct, total_size, 0.0001); + delete matmul_param; + delete mm; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; +} +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc index 4c789e63fd..e4739ff921 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc @@ -13,13 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include #include "utils/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h" -#include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h" #include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h" #include "mindspore/lite/src/kernel_registry.h" #include "mindspore/lite/src/lite_kernel.h" @@ -30,21 +28,6 @@ class TestFcInt8 : public mindspore::Common { TestFcInt8() {} }; -void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { - for (int i = 0; i < length; ++i) { - int8_t q = static_cast(std::max( - std::numeric_limits::min(), - std::min(std::numeric_limits::max(), std::round(zero_point + (input_data[i] / scale))))); - output_data[i] = q; - } -} - -void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { - for (int i = 0; i < length; ++i) { - output_data[i] = scale * (input_data[i] - zero_point); - } -} - int FcInt8TestInit(std::vector *inputs_, std::vector *outputs_, MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) { float input_max = 20; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc new file mode 100644 index 0000000000..db4ea4054f --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc @@ -0,0 +1,126 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/lite_kernel.h" + +namespace mindspore { +class TestMatmulInt8 : public mindspore::Common { + public: + TestMatmulInt8() {} +}; + +int MMInt8TestInit(std::vector *inputs_, std::vector *outputs_, + MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) { + float input_max = 20; + float input_min = -20; + float weight_max = 1; + float weight_min = -1; + float output_max = 30; + float output_min = -30; + + double input_scale = + (input_max - input_min) / (std::numeric_limits::max() - std::numeric_limits::min()); + int input_zp = std::numeric_limits::max() - input_max / input_scale; + double weight_scale = + (weight_max - weight_min) / (std::numeric_limits::max() - std::numeric_limits::min()); + int weight_zp = std::numeric_limits::max() - weight_max / weight_scale; + double output_scale = + (output_max - output_min) / (std::numeric_limits::max() - std::numeric_limits::min()); + int output_zp = std::numeric_limits::max() - output_max / output_scale; + *scale = output_scale; + *zeropoint = output_zp; + + auto in_t = + new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 8}, schema::Format_NHWC, static_cast(1)); + in_t->MallocData(); + float in[] = {6.583835634764597, 11.337275140963907, -4.125256949459629, 10.994337291530833, + 19.086065139532636, 3.620842999158455, 13.167624585590346, -18.326739299407755, + 14.877693740734841, -17.092677920571653, 19.24147072807235, -15.14805323833401, + -18.075654829688737, -0.9164404591894204, -3.836646280336332, -10.870298671273918}; + Quantize(in, in_t->ElementsNum(), input_scale, input_zp, reinterpret_cast(in_t->Data())); + auto in_quant_arg = new mindspore::lite::tensor::QuantArg(); + in_quant_arg->zeroPoint = input_zp; + in_quant_arg->scale = input_scale; + in_t->AddQuantParam(*in_quant_arg); + inputs_->push_back(in_t); + + auto weight_t = + new lite::tensor::Tensor(kNumberTypeInt8, {1, 3, 8}, schema::Format_NHWC, static_cast(1)); + weight_t->MallocData(); + float weight[] = {0.3651070698591563, -0.5856943921727129, -0.7472032663840145, 0.9489992871641959, + -0.8179490270358738, -0.873058811259344, 0.39876672713807215, -0.1816769383004213, + -0.13584645926733696, -0.7614673836659709, -0.2535825872616164, -0.05265760030895916, + 0.28558728305658754, 0.15404213943520118, -0.1634824450738006, -0.5068199082730189, + -0.026961256849111326, -0.1508441942453307, 0.9375335677537737, 0.3304690744194263, + -0.5091563780251127, 0.029887336278646925, -0.39540496207319276, 0.46094065001445084}; + Quantize(weight, weight_t->ElementsNum(), weight_scale, weight_zp, reinterpret_cast(weight_t->Data())); + auto weight_quant_arg = new mindspore::lite::tensor::QuantArg(); + weight_quant_arg->zeroPoint = weight_zp; + weight_quant_arg->scale = weight_scale; + weight_t->AddQuantParam(*weight_quant_arg); + inputs_->push_back(weight_t); + + auto out_t = + new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 3}, schema::Format_NHWC, static_cast(1)); + out_t->MallocData(); + auto output_quant_arg = new mindspore::lite::tensor::QuantArg(); + output_quant_arg->zeroPoint = output_zp; + output_quant_arg->scale = output_scale; + out_t->AddQuantParam(*output_quant_arg); + outputs_->push_back(out_t); + + *correct = reinterpret_cast(malloc(out_t->ElementsNum() * sizeof(float))); + float nchw_co[] = {-0.912632942, 4.08398056, -25.385608673, 2.720281124, 7.745952606, 20.893184662}; + memcpy(*correct, nchw_co, out_t->ElementsNum() * sizeof(float)); + + matmal_param->b_transpose_ = true; + matmal_param->a_transpose_ = false; + matmal_param->has_bias_ = false; + return out_t->ElementsNum(); +} + +TEST_F(TestMatmulInt8, mmint8) { + std::vector inputs_; + std::vector outputs_; + auto matmul_param = new MatMulParameter(); + float *correct; + double output_scale; + int output_zp; + int total_size = MMInt8TestInit(&inputs_, &outputs_, matmul_param, &correct, &output_scale, &output_zp); + auto ctx = new lite::Context; + ctx->threadNum = 2; + kernel::MatmulInt8CPUKernel *mm = + new kernel::MatmulInt8CPUKernel(reinterpret_cast(matmul_param), inputs_, outputs_, ctx); + + mm->Init(); + mm->Run(); + float fout[6] = {0}; + Dequantize(reinterpret_cast(outputs_[0]->Data()), outputs_[0]->ElementsNum(), output_scale, output_zp, + fout); + CompareOutputData(fout, correct, 6, 0.3); + delete matmul_param; + delete mm; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + free(correct); +} + +} // namespace mindspore