parent
c54a4a4494
commit
a9c90f71e4
@ -1,99 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
||||
#define MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
#ifdef __ANDROID__
|
||||
#include <asm/hwcap.h>
|
||||
#include "nnacl/nnacl_utils.h"
|
||||
#endif
|
||||
#include "src/common/log_adapter.h"
|
||||
|
||||
#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so"
|
||||
#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so"
|
||||
|
||||
class OptimizeModule {
|
||||
public:
|
||||
OptimizeModule() {
|
||||
bool support_optimize_ops = false;
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
int hwcap_type = 16;
|
||||
uint32_t hwcap = getHwCap(hwcap_type);
|
||||
if (hwcap & HWCAP_ASIMDDP) {
|
||||
MS_LOG(INFO) << "Hw cap support SMID Dot Product, hwcap: 0x" << hwcap;
|
||||
support_optimize_ops = true;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Hw cap NOT support SIMD Dot Product, hwcap: 0x" << hwcap;
|
||||
}
|
||||
#endif
|
||||
if (support_optimize_ops == false) {
|
||||
return;
|
||||
}
|
||||
#ifdef ENABLE_ARM64
|
||||
optimized_op_handler_ = dlopen(OPTIMIZE_SHARED_LIBRARY_PATH, RTLD_LAZY);
|
||||
if (optimized_op_handler_ == nullptr) {
|
||||
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~OptimizeModule() = default;
|
||||
|
||||
static OptimizeModule *GetInstance() {
|
||||
static OptimizeModule opt_module;
|
||||
return &opt_module;
|
||||
}
|
||||
void *optimized_op_handler_ = nullptr;
|
||||
};
|
||||
|
||||
class Float16Module {
|
||||
public:
|
||||
Float16Module() {
|
||||
bool support_fp16 = false;
|
||||
#ifdef ENABLE_ARM64
|
||||
int hwcap_type = 16;
|
||||
uint32_t hwcap = getHwCap(hwcap_type);
|
||||
if (hwcap & HWCAP_FPHP) {
|
||||
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
||||
support_fp16 = true;
|
||||
}
|
||||
#endif
|
||||
if (support_fp16 == false) {
|
||||
return;
|
||||
}
|
||||
#ifdef ENABLE_ARM64
|
||||
float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY);
|
||||
if (float16_op_handler_ == nullptr) {
|
||||
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~Float16Module() = default;
|
||||
|
||||
static Float16Module *GetInstance() {
|
||||
static Float16Module fp16_module;
|
||||
return &fp16_module;
|
||||
}
|
||||
void *float16_op_handler_ = nullptr;
|
||||
};
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifdef ENABLE_ARM64
|
||||
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
|
||||
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
|
||||
const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp,
|
||||
int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after,
|
||||
size_t asymmetric, size_t per_channel, size_t per_channel_offset);
|
||||
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
|
||||
const int *input_sum, const int *bias);
|
||||
|
||||
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, size_t per_channel);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue