diff --git a/mindspore/lite/src/ops/matmul.cc b/mindspore/lite/src/ops/matmul.cc index 53ea430a9d..f56d1c9a14 100644 --- a/mindspore/lite/src/ops/matmul.cc +++ b/mindspore/lite/src/ops/matmul.cc @@ -22,10 +22,6 @@ namespace mindspore::lite { int MatMul::InferShape(std::vector inputs_, std::vector outputs_) { MS_ASSERT(this->primitive != nullptr); - if (inputs_.size() != kDoubleNum) { - MS_LOG(ERROR) << "OpMatMul inputs size: " << inputs_.size(); - return RET_INPUT_TENSOR_ERROR; - } auto input0 = inputs_.front(); MS_ASSERT(input0 != nullptr); auto input1 = inputs_.at(1); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc index 88a0bc7f8c..2e35323ba6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc @@ -20,6 +20,7 @@ #include "include/errorcode.h" using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_INPUT_TENSOR_ERROR; using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; @@ -28,6 +29,7 @@ MatmulCPUKernel::~MatmulCPUKernel() { ctx_->allocator->Free(a_c8_ptr_); ctx_->allocator->Free(b_r8_ptr_); ctx_->allocator->Free(c_r8x8_ptr_); + ctx_->allocator->Free(bias_ptr_); } int MatmulCPUKernel::ReSize() { return RET_OK; } @@ -40,6 +42,14 @@ int MatmulCPUKernel::Init() { int batch = 1; auto a_shape = in_tensors_[0]->shape(); auto c_shape = out_tensors_[0]->shape(); + if (in_tensors_.size() == 3) { + auto bias_shape = in_tensors_[2]->shape(); + if (bias_shape[bias_shape.size() - 1] != c_shape[c_shape.size() - 1]) { + MS_LOG(ERROR) << "The bias' dimension is not equal with column"; + return RET_INPUT_TENSOR_ERROR; + } + } + for (int i = 0; i < a_shape.size() - 2; ++i) { batch *= a_shape[i]; } @@ -67,6 +77,15 @@ int MatmulCPUKernel::Init() { return RET_MEMORY_FAILED; } memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(float)); + + if (in_tensors_.size() == 3) { + bias_ptr_ = reinterpret_cast(malloc(params_->col_8_ * sizeof(float))); + memset(bias_ptr_, 0, params_->col_8_ * sizeof(float)); + memcpy(bias_ptr_, in_tensors_[2]->Data(), params_->col_ * sizeof(float)); + } else { + bias_ptr_ = nullptr; + } + return RET_OK; } @@ -77,7 +96,12 @@ int MatmulCPUKernel::RunImpl(int task_id) { } auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_; auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_; - MatMul(a_c8_ptr_, cur_b, cur_c, NULL, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8, 0, false); + if (bias_ptr_) { + auto cur_bias = bias_ptr_ + task_id * thread_stride_ * C8NUM; + MatMul(a_c8_ptr_, cur_b, cur_c, cur_bias, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8, 0, false); + } else { + MatMul(a_c8_ptr_, cur_b, cur_c, NULL, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8, 0, false); + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h index 6efb35bfe4..654950c9ff 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h @@ -39,6 +39,7 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { float *a_c8_ptr_; float *b_r8_ptr_; float *c_r8x8_ptr_; + float *bias_ptr_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc index 5f55cfbbc5..0ed7a05f73 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc @@ -202,6 +202,35 @@ int MMTestInit(std::vector *inputs_, std::vectorElementsNum(); } +int MMTestInit2(std::vector *inputs_, std::vector *outputs_, + float *a_ptr, float *b_ptr, float *bias_ptr, std::vector a_shape, std::vector b_shape, + std::vector bias_shape, std::vector c_shape) { + auto in_t = + new lite::tensor::Tensor(kNumberTypeFloat, a_shape, schema::Format_NHWC, static_cast(1)); + in_t->MallocData(); + memcpy(in_t->Data(), a_ptr, sizeof(float) * in_t->ElementsNum()); + inputs_->push_back(in_t); + + auto weight_t = + new lite::tensor::Tensor(kNumberTypeFloat, b_shape, schema::Format_NHWC, static_cast(1)); + weight_t->MallocData(); + memcpy(weight_t->Data(), b_ptr, sizeof(float) * weight_t->ElementsNum()); + inputs_->push_back(weight_t); + + auto bias_t = + new lite::tensor::Tensor(kNumberTypeFloat, bias_shape, schema::Format_NHWC, static_cast(1)); + bias_t->MallocData(); + memcpy(bias_t->Data(), bias_ptr, sizeof(float) * bias_t->ElementsNum()); + inputs_->push_back(bias_t); + + auto out_t = + new lite::tensor::Tensor(kNumberTypeFloat, c_shape, schema::Format_NHWC, static_cast(1)); + out_t->MallocData(); + outputs_->push_back(out_t); + + return out_t->ElementsNum(); +} + TEST_F(TestMatMulFp32, simple) { std::vector inputs_; std::vector outputs_; @@ -232,6 +261,38 @@ TEST_F(TestMatMulFp32, simple) { for (auto t : outputs_) delete t; } +TEST_F(TestMatMulFp32, simple_bias) { + std::vector inputs_; + std::vector outputs_; + auto matmul_param = new MatMulParameter(); + matmul_param->a_transpose_ = false; + matmul_param->b_transpose_ = false; + matmul_param->has_bias_ = false; + float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383, + 17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792}; + float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435, + 0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552, + -0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459, + 0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343}; + float bias[] = {1, 2, 3}; + std::vector a_shape = {2, 8}; + std::vector b_shape = {8, 3}; + std::vector bias_shape = {1, 3}; + std::vector c_shape = {2, 3}; + int total_size = MMTestInit2(&inputs_, &outputs_, a, b, bias, a_shape, b_shape, bias_shape, c_shape); + auto ctx = new lite::Context; + ctx->thread_num_ = 1; + auto mm = new kernel::MatmulCPUKernel(reinterpret_cast(matmul_param), inputs_, outputs_, ctx, nullptr); + mm->Init(); + mm->Run(); + float correct[] = {-0.1256939023733139 + 1, -0.07744802534580231 + 2, 0.07410638779401779 + 3, + -0.3049793541431427 + 1, -0.027687929570674896 + 2, -0.18109679222106934 + 3}; + CompareOutputData(reinterpret_cast(outputs_[0]->Data()), correct, total_size, 0.0001); + delete mm; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; +} + TEST_F(TestMatMulFp32, simple2) { std::vector inputs_; std::vector outputs_;