parent
c54a4a4494
commit
a9c90f71e4
@ -1,99 +0,0 @@
|
|||||||
/**
|
|
||||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
|
||||||
#define MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
#include <dlfcn.h>
|
|
||||||
#endif
|
|
||||||
#ifdef __ANDROID__
|
|
||||||
#include <asm/hwcap.h>
|
|
||||||
#include "nnacl/nnacl_utils.h"
|
|
||||||
#endif
|
|
||||||
#include "src/common/log_adapter.h"
|
|
||||||
|
|
||||||
#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so"
|
|
||||||
#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so"
|
|
||||||
|
|
||||||
class OptimizeModule {
|
|
||||||
public:
|
|
||||||
OptimizeModule() {
|
|
||||||
bool support_optimize_ops = false;
|
|
||||||
|
|
||||||
#ifdef ENABLE_ARM64
|
|
||||||
int hwcap_type = 16;
|
|
||||||
uint32_t hwcap = getHwCap(hwcap_type);
|
|
||||||
if (hwcap & HWCAP_ASIMDDP) {
|
|
||||||
MS_LOG(INFO) << "Hw cap support SMID Dot Product, hwcap: 0x" << hwcap;
|
|
||||||
support_optimize_ops = true;
|
|
||||||
} else {
|
|
||||||
MS_LOG(INFO) << "Hw cap NOT support SIMD Dot Product, hwcap: 0x" << hwcap;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (support_optimize_ops == false) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#ifdef ENABLE_ARM64
|
|
||||||
optimized_op_handler_ = dlopen(OPTIMIZE_SHARED_LIBRARY_PATH, RTLD_LAZY);
|
|
||||||
if (optimized_op_handler_ == nullptr) {
|
|
||||||
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
~OptimizeModule() = default;
|
|
||||||
|
|
||||||
static OptimizeModule *GetInstance() {
|
|
||||||
static OptimizeModule opt_module;
|
|
||||||
return &opt_module;
|
|
||||||
}
|
|
||||||
void *optimized_op_handler_ = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Float16Module {
|
|
||||||
public:
|
|
||||||
Float16Module() {
|
|
||||||
bool support_fp16 = false;
|
|
||||||
#ifdef ENABLE_ARM64
|
|
||||||
int hwcap_type = 16;
|
|
||||||
uint32_t hwcap = getHwCap(hwcap_type);
|
|
||||||
if (hwcap & HWCAP_FPHP) {
|
|
||||||
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
|
||||||
support_fp16 = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (support_fp16 == false) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#ifdef ENABLE_ARM64
|
|
||||||
float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY);
|
|
||||||
if (float16_op_handler_ == nullptr) {
|
|
||||||
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
~Float16Module() = default;
|
|
||||||
|
|
||||||
static Float16Module *GetInstance() {
|
|
||||||
static Float16Module fp16_module;
|
|
||||||
return &fp16_module;
|
|
||||||
}
|
|
||||||
void *float16_op_handler_ = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
|
@ -0,0 +1,40 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "nnacl/op_base.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
|
||||||
|
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
|
||||||
|
const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp,
|
||||||
|
int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after,
|
||||||
|
size_t asymmetric, size_t per_channel, size_t per_channel_offset);
|
||||||
|
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
|
||||||
|
const int *input_sum, const int *bias);
|
||||||
|
|
||||||
|
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||||
|
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||||
|
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||||
|
int32_t maxi, size_t per_channel);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue