parent
5247fe7315
commit
6a9eead482
@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/matrix_fp16.h"
|
||||
|
||||
void MatrixMultiplyFp16(const float16_t *matrix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k,
|
||||
int n) {
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float16_t res = 0;
|
||||
for (int i = 0; i < k; i++) {
|
||||
res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
|
||||
}
|
||||
*(matrix_c + count) = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MatrixMultiplyVecFp16(const float16x8_t *matrix_a, const float16x8_t *matrix_b, float16x8_t *matrix_c,
|
||||
const float16_t *bias, int m, int k, int n) {
|
||||
if (bias == NULL) {
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float16x8_t res = vmovq_n_f16(0);
|
||||
for (int i = 0; i < k; i++) {
|
||||
res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
|
||||
}
|
||||
matrix_c[count] = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int count = 0;
|
||||
float16x8_t bias_ptr = vld1q_f16(bias);
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float16x8_t res = vmovq_n_f16(0);
|
||||
for (int i = 0; i < k; i++) {
|
||||
res = vaddq_f16(res, vmulq_f16(matrix_a[h_offset + i], matrix_b[w + i * n]));
|
||||
}
|
||||
matrix_c[count] = vaddq_f16(res, bias_ptr);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,54 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_MATRIX_TABLE_H_
|
||||
#define MINDSPORE_LITE_NNACL_MATRIX_TABLE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void MatrixG4x2(float *matrix_data);
|
||||
|
||||
void MatrixGT2x4(float *matrix_data);
|
||||
|
||||
void MatrixG8x2(float *matrix_data);
|
||||
|
||||
void MatrixGT2x8(float *matrix_data);
|
||||
|
||||
void MatrixG8x3(float *matrix_data);
|
||||
|
||||
void MatrixGT3x8(float *matrix_data);
|
||||
|
||||
void MatrixG8x4(float *matrix_data);
|
||||
|
||||
void MatrixGT4x8(float *matrix_data);
|
||||
|
||||
void MatrixG8x5(float *matrix_data);
|
||||
|
||||
void MatrixGT5x8(float *matrix_data);
|
||||
|
||||
void MatrixG8x6(float *matrix_data);
|
||||
|
||||
void MatrixGT6x8(float *matrix_data);
|
||||
|
||||
void MatrixG8x7(float *matrix_data);
|
||||
|
||||
void MatrixGT7x8(float *matrix_data);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_MATRIX_TABLE_H_
|
@ -0,0 +1,233 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "nnacl/minimal_filtering_generator.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
void Polynomial(float *interval, float *m, int degree) {
|
||||
for (int i = 0; i < degree; ++i) {
|
||||
float mul = 1;
|
||||
for (int j = 0; j < degree; ++j) {
|
||||
if (i == j) continue;
|
||||
mul *= (interval[i] - interval[j]);
|
||||
}
|
||||
m[i] = mul;
|
||||
}
|
||||
}
|
||||
|
||||
void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree) {
|
||||
int data_num = (degree + 1) * (degree + 1);
|
||||
memset(diagonal_matrix, 0, data_num * sizeof(float));
|
||||
for (int i = 0; i < (degree + 1); ++i) {
|
||||
for (int j = 0; j < (degree + 1); ++j) {
|
||||
if (j == i) diagonal_matrix[i * (degree + 1) + j] = matrix[i];
|
||||
}
|
||||
}
|
||||
diagonal_matrix[data_num - 1] = 1;
|
||||
}
|
||||
|
||||
void ResidueMatrix(float *interval, float *b, int row, int col) {
|
||||
// row : input unit, col : output_unit
|
||||
// result : matrix b
|
||||
int len = row * col;
|
||||
memset(b, 0, len * sizeof(float));
|
||||
for (int i = 0; i < row - 1; ++i) {
|
||||
for (int j = 0; j < col; ++j) {
|
||||
b[i * col + j] = pow(interval[i], j);
|
||||
}
|
||||
}
|
||||
b[len - 1] = 1;
|
||||
}
|
||||
|
||||
void LT(float *poly_array, float *matrix_lt, int n) {
|
||||
float *coefficient_array = (float *)malloc(n * sizeof(float));
|
||||
float *poly = (float *)malloc(n * sizeof(float));
|
||||
Polynomial(poly_array, poly, n);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
// get coefficient
|
||||
int index = 1;
|
||||
memset(coefficient_array, 0, n * sizeof(float));
|
||||
coefficient_array[0] = 1;
|
||||
for (int j = 0; j < n; ++j) {
|
||||
if (j == i) continue;
|
||||
float poly_coe = poly_array[j] == 0 ? 0 : -poly_array[j];
|
||||
coefficient_array[index] = 1;
|
||||
for (int k = index - 1; k > 0; --k) {
|
||||
coefficient_array[k] = coefficient_array[k] * poly_coe + coefficient_array[k - 1];
|
||||
}
|
||||
coefficient_array[0] *= poly_coe;
|
||||
index++;
|
||||
}
|
||||
|
||||
// lx[i, 0].nth(j) / f[i]
|
||||
int setp = i * n;
|
||||
for (int l = 0; l < n; ++l) {
|
||||
matrix_lt[setp + l] = coefficient_array[l] / poly[i];
|
||||
}
|
||||
} // matrix L row loop
|
||||
free(coefficient_array);
|
||||
free(poly);
|
||||
}
|
||||
|
||||
void T(float *poly_array, float *matrix_t, int n) {
|
||||
memset(matrix_t, 0, n * (n + 1) * sizeof(float));
|
||||
for (int i = 0; i < n; ++i) {
|
||||
for (int j = 0; j < n + 1; ++j) {
|
||||
if (j == i) matrix_t[i * (n + 1) + j] = 1;
|
||||
if (j == n) {
|
||||
if (poly_array[i] == 0) {
|
||||
matrix_t[i * (n + 1) + j] = 0;
|
||||
} else {
|
||||
matrix_t[i * (n + 1) + j] = -pow(poly_array[i], n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void B(float *poly_array, float *matrix_b, int in_unit) {
|
||||
memset(matrix_b, 0, in_unit * in_unit * sizeof(float));
|
||||
int n = in_unit - 1;
|
||||
float *matrix_l = (float *)malloc(n * n * sizeof(float));
|
||||
float *matrix_lt = (float *)malloc(n * n * sizeof(float));
|
||||
float *matrix_t = (float *)malloc(n * in_unit * sizeof(float));
|
||||
T(poly_array, matrix_t, n);
|
||||
LT(poly_array, matrix_lt, n);
|
||||
MatrixTranspose(matrix_lt, matrix_l, n, n);
|
||||
MatrixMultiply(matrix_l, matrix_t, matrix_b, n, n, in_unit);
|
||||
matrix_b[in_unit * in_unit - 1] = 1;
|
||||
free(matrix_l);
|
||||
free(matrix_lt);
|
||||
free(matrix_t);
|
||||
}
|
||||
|
||||
void GenerateIntervalArray(float *array, float interval, int degree) {
|
||||
array[0] = 0;
|
||||
for (int i = 1; i < degree; ++i) {
|
||||
int coefficient = pow(-1, i - 1);
|
||||
array[i] = array[i - 1] + interval * i * coefficient;
|
||||
}
|
||||
}
|
||||
|
||||
void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col) {
|
||||
for (int i = 0; i < col; ++i) {
|
||||
for (int j = 0; j < row; ++j) {
|
||||
trans_matrix[i * row + j] = matrix[j * col + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n) {
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float res = 0;
|
||||
for (int i = 0; i < k; i++) {
|
||||
res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
|
||||
}
|
||||
*(matrix_c + count) = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CookToomFilter(float *matrix_a, float *matrix_at, float *matrix_b, float *matrix_bt, float *matrix_g,
|
||||
float *matrix_gt, float coefficient, int out_unit, int filter_size) {
|
||||
int in_unit = out_unit + filter_size - 1;
|
||||
int degree = in_unit - 1;
|
||||
float *polynomial_m = malloc(degree * sizeof(float));
|
||||
float *diagonal_matrix = malloc(in_unit * in_unit * sizeof(float));
|
||||
float *inverse_diagonal_matrix = malloc(in_unit * in_unit * sizeof(float));
|
||||
|
||||
// get diagonal matrix
|
||||
float *interval = malloc(degree * sizeof(float));
|
||||
GenerateIntervalArray(interval, coefficient, degree);
|
||||
Polynomial(interval, polynomial_m, degree);
|
||||
DiagonalPlusMatrix(polynomial_m, diagonal_matrix, degree);
|
||||
if (diagonal_matrix[0] < 0) {
|
||||
for (int i = 0; i < in_unit; ++i) {
|
||||
if (diagonal_matrix[i] != 0) diagonal_matrix[i] *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse diagonal matrix
|
||||
for (int j = 0; j < in_unit * in_unit; ++j) {
|
||||
if (diagonal_matrix[j] != 0) {
|
||||
inverse_diagonal_matrix[j] = 1.0 / diagonal_matrix[j];
|
||||
} else {
|
||||
inverse_diagonal_matrix[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// get matrix A && AT
|
||||
ResidueMatrix(interval, matrix_a, in_unit, out_unit);
|
||||
MatrixTranspose(matrix_a, matrix_at, in_unit, out_unit);
|
||||
|
||||
// get matrix B
|
||||
B(interval, matrix_bt, in_unit);
|
||||
MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit);
|
||||
MatrixMultiply(diagonal_matrix, matrix_b, matrix_bt, in_unit, in_unit, in_unit);
|
||||
MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit);
|
||||
|
||||
// get matrix G && GT
|
||||
float *tmp_g = malloc(in_unit * filter_size * sizeof(float));
|
||||
ResidueMatrix(interval, matrix_g, in_unit, filter_size);
|
||||
MatrixTranspose(matrix_g, tmp_g, in_unit, filter_size);
|
||||
MatrixMultiply(tmp_g, inverse_diagonal_matrix, matrix_gt, filter_size, in_unit, in_unit);
|
||||
MatrixTranspose(matrix_gt, matrix_g, filter_size, in_unit);
|
||||
|
||||
free(interval);
|
||||
free(polynomial_m);
|
||||
free(diagonal_matrix);
|
||||
free(inverse_diagonal_matrix);
|
||||
free(tmp_g);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ARM
|
||||
void MatrixMultiplyVec(const float32x4_t *matrix_a, const float32x4_t *matrix_b, float32x4_t *matrix_c,
|
||||
const float *bias, int m, int k, int n) {
|
||||
if (bias == NULL) {
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float32x4_t res = vmovq_n_f32(0);
|
||||
for (int i = 0; i < k; i++) {
|
||||
res = vmlaq_f32(res, matrix_a[h_offset + i], matrix_b[w + i * n]);
|
||||
}
|
||||
matrix_c[count] = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int count = 0;
|
||||
float32x4_t bias_ptr = vld1q_f32(bias);
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float32x4_t res = vmovq_n_f32(0);
|
||||
for (int i = 0; i < k; i++) {
|
||||
res = vmlaq_f32(res, matrix_a[h_offset + i], matrix_b[w + i * n]);
|
||||
}
|
||||
matrix_c[count] = vaddq_f32(res, bias_ptr);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_MINIMAL_FILTERING_GENERATOR_H_
|
||||
#define MINDSPORE_LITE_NNACL_MINIMAL_FILTERING_GENERATOR_H_
|
||||
|
||||
#ifdef ENABLE_ARM
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Polynomial(float *interval, float *m, int degree);
|
||||
|
||||
void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree);
|
||||
|
||||
void ResidueMatrix(float *interval, float *b, int row, int col);
|
||||
|
||||
void LT(float *poly_array, float *matrix_lt, int n);
|
||||
|
||||
void T(float *poly_array, float *matrix_t, int n);
|
||||
|
||||
void B(float *poly_array, float *matrix_b, int in_unit);
|
||||
|
||||
void GenerateIntervalArray(float *array, float interval, int degree);
|
||||
|
||||
void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col);
|
||||
|
||||
void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n);
|
||||
|
||||
void CookToomFilter(float *matrix_a, float *matrix_at, float *matrix_b, float *matrix_bt, float *matrix_g,
|
||||
float *matrix_gt, float coefficient, int out_unit, int filter_size);
|
||||
|
||||
#ifdef ENABLE_ARM
|
||||
void MatrixMultiplyVec(const float32x4_t *matrix_a, const float32x4_t *matrix_b, float32x4_t *matrix_c,
|
||||
const float *bias, int m, int k, int n);
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_MINIMAL_FILTERING_GENERATOR_H_
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,86 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/base/matrix.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
Matrix *TransformMatrixGenerator(int m, int k) {
|
||||
auto matrix = new (std::nothrow) Matrix;
|
||||
if (matrix == nullptr) {
|
||||
MS_LOG(ERROR) << "matrix is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
auto data = malloc(m * k * sizeof(float));
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc matrix data failed.";
|
||||
return nullptr;
|
||||
}
|
||||
matrix->SetData(data);
|
||||
matrix->SetNum(m, k);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
void ChooseMatrixG(Matrix *matrix_g, Matrix *matrix_gt) {
|
||||
int m = matrix_g->GetM();
|
||||
int k = matrix_g->GetK();
|
||||
auto matrix_g_data = reinterpret_cast<float *>(matrix_g->GetData());
|
||||
auto matrix_gt_data = reinterpret_cast<float *>(matrix_gt->GetData());
|
||||
// m represents input unit, only 4 or 8 can be accepted for input unit.
|
||||
// k represents kernel unit, varies from 2 to 7.
|
||||
if (m == 4 && k == 2) {
|
||||
MatrixG4x2(matrix_g_data);
|
||||
MatrixGT2x4(matrix_gt_data);
|
||||
} else if (m == 8 && k == 2) {
|
||||
MatrixG8x2(matrix_g_data);
|
||||
MatrixGT2x8(matrix_gt_data);
|
||||
} else if (m == 8 && k == 3) {
|
||||
MatrixG8x3(matrix_g_data);
|
||||
MatrixGT3x8(matrix_gt_data);
|
||||
} else if (m == 8 && k == 4) {
|
||||
MatrixG8x4(matrix_g_data);
|
||||
MatrixGT4x8(matrix_gt_data);
|
||||
} else if (m == 8 && k == 5) {
|
||||
MatrixG8x5(matrix_g_data);
|
||||
MatrixGT5x8(matrix_gt_data);
|
||||
} else if (m == 8 && k == 6) {
|
||||
MatrixG8x6(matrix_g_data);
|
||||
MatrixGT6x8(matrix_gt_data);
|
||||
} else if (m == 8 && k == 7) {
|
||||
MatrixG8x7(matrix_g_data);
|
||||
MatrixGT7x8(matrix_gt_data);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported input unit or kernel unit.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n, bool row) {
|
||||
// row-major implementation
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float res = 0;
|
||||
for (int i = 0; i < k; i++) {
|
||||
res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
|
||||
}
|
||||
*(matrix_c + count) = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace mindspore::kernel
|
@ -1,77 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATRIX_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATRIX_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
#include "nnacl/winograd_utils.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class Matrix {
|
||||
public:
|
||||
Matrix() = default;
|
||||
~Matrix() {
|
||||
if (data_ != nullptr) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void SetData(void *data) { this->data_ = data; }
|
||||
|
||||
void *GetData() { return this->data_; }
|
||||
|
||||
void SetNDim(int dim) { this->n_dim_ = dim; }
|
||||
|
||||
int GetNDim() { return this->n_dim_; }
|
||||
|
||||
void SetShape(std::vector<int> shape) { this->shape_ = shape; }
|
||||
|
||||
std::vector<int> GetShape() { return this->shape_; }
|
||||
|
||||
void SetStride(std::vector<int> stride) { this->stride_ = stride; }
|
||||
|
||||
std::vector<int> GetStride() { return this->stride_; }
|
||||
|
||||
void SetNum(int m, int k) {
|
||||
this->m_ = m;
|
||||
this->k_ = k;
|
||||
}
|
||||
|
||||
int GetM() { return this->m_; }
|
||||
|
||||
int GetK() { return this->k_; }
|
||||
|
||||
protected:
|
||||
void *data_ = nullptr;
|
||||
std::vector<int> shape_;
|
||||
std::vector<int> stride_;
|
||||
int m_;
|
||||
int k_;
|
||||
int n_dim_;
|
||||
bool row_major_;
|
||||
};
|
||||
|
||||
Matrix *TransformMatrixGenerator(int m, int k);
|
||||
|
||||
// Chinese Remainder Theorem interp: 0.5
|
||||
void ChooseMatrixG(Matrix *matrix_g, Matrix *matrix_gt);
|
||||
|
||||
void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n, bool row);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATRIX_H_
|
File diff suppressed because it is too large
Load Diff
@ -1,36 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/fp16/matrix_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
void MatrixMultiplyFp16(const float16_t *matrix_a, const float16_t *matrix_b, float16_t *matrix_c, int m, int k, int n,
|
||||
bool row) {
|
||||
// row-major implementation
|
||||
int count = 0;
|
||||
for (int h = 0; h < m; h++) {
|
||||
int h_offset = h * k;
|
||||
for (int w = 0; w < n; w++) {
|
||||
float16_t res = 0;
|
||||
for (int i = 0; i < k; i++) {
|
||||
res += *(matrix_a + h_offset + i) * *(matrix_b + w + i * n);
|
||||
}
|
||||
*(matrix_c + count) = res;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace mindspore::kernel
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue