optimize int8 sigmoid(logistic)

pull/7290/head
fuzhiye 4 years ago
parent 11058ad0ef
commit 8549266616

@ -16,15 +16,11 @@
#include "nnacl/int8/sigmoid_int8.h" #include "nnacl/int8/sigmoid_int8.h"
int SigmoidInt8(const int8_t *src, int length, int8_t *dst, SigmoidQuantArg *arg) { int SigmoidInt8(const int8_t *src, int length, int8_t *dst, int8_t *table) {
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
const int16_t input_value = src[i] - arg->input_zp; const int8_t input_value = src[i];
int16_t output; uint8_t index = (uint8_t)input_value;
output = round(1 / arg->output_scale * (1 / (1 + exp(-arg->input_scale * input_value)))); dst[i] = table[index];
output += arg->output_zp;
output = MSMIN(output, 127);
output = MSMAX(output, -128);
dst[i] = (int8_t)output;
} }
return 0; return 0;
} }

@ -21,21 +21,10 @@
#include "nnacl/errorcode.h" #include "nnacl/errorcode.h"
#include "nnacl/quantization/fixed_point.h" #include "nnacl/quantization/fixed_point.h"
typedef struct SigmoidQuantArg {
double input_scale;
int32_t input_zp;
double output_scale;
int32_t output_zp;
int16_t relu6_multiplier_fixedpoint_int16;
int32_t relu6_multiplier_exponent;
int16_t output_multiplier_fixedpoint_int16;
int32_t output_multiplier_exponent;
} SigmoidQuantArg;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int SigmoidInt8(const int8_t *src, int length, int8_t *dst, SigmoidQuantArg *arg); int SigmoidInt8(const int8_t *src, int length, int8_t *dst, int8_t *table);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -16,6 +16,7 @@
#include "src/runtime/kernel/arm/int8/sigmoid_int8.h" #include "src/runtime/kernel/arm/int8/sigmoid_int8.h"
#include <limits> #include <limits>
#include <algorithm>
#include "nnacl/int8/sigmoid_int8.h" #include "nnacl/int8/sigmoid_int8.h"
#include "nnacl/quantization/quantize.h" #include "nnacl/quantization/quantize.h"
#include "schema/model_generated.h" #include "schema/model_generated.h"
@ -30,39 +31,36 @@ using mindspore::lite::RET_OK;
using mindspore::schema::ActivationType_SIGMOID; using mindspore::schema::ActivationType_SIGMOID;
namespace mindspore::kernel { namespace mindspore::kernel {
int SigmoidInt8CPUKernel::Init() { void CalculateTableList(int8_t *table, const float input_scale, const int32_t input_zp) {
lite::Tensor *input = in_tensors_.at(0); int32_t min_value = std::numeric_limits<int8_t>::min();
lite::Tensor *output = out_tensors_.at(0); int32_t max_value = std::numeric_limits<int8_t>::max();
MS_ASSERT(input); const float output_scale = 1.0f / 256;
MS_ASSERT(output); const int32_t output_zp = -128;
quant_arg_.input_scale = input->GetQuantParams().front().scale;
quant_arg_.input_zp = input->GetQuantParams().front().zeroPoint;
quant_arg_.output_scale = output->GetQuantParams().front().scale;
quant_arg_.output_zp = output->GetQuantParams().front().zeroPoint;
const float output_multiplier = (1.0f / 128.0f) * quant_arg_.input_scale / quant_arg_.output_scale; for (int i = min_value; i < max_value; ++i) {
const float real_input_value = input_scale * (i - input_zp);
int32_t output_multiplier_fixedpoint; const float sigmoid_value = 1.0f / (1.0f + std::exp(-real_input_value));
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint, &quant_arg_.output_multiplier_exponent); const int32_t quantized = std::round(sigmoid_value / output_scale) + output_zp;
MS_ASSERT(quant_arg_.output_multiplier_exponent <= 0); int8_t out_value = static_cast<int8_t>(std::max(std::min(quantized, max_value), min_value));
MultiplierInt32ToInt16(output_multiplier_fixedpoint, &quant_arg_.output_multiplier_fixedpoint_int16); uint8_t index = static_cast<uint8_t>(i);
table[index] = out_value;
const float relu6_multiplier = (1.0f / 128.0f) * quant_arg_.input_scale / (3.0f / 32768.0f); }
int32_t relu6_multiplier_fixedpoint;
QuantizeMultiplier(relu6_multiplier, &relu6_multiplier_fixedpoint, &quant_arg_.relu6_multiplier_exponent);
MultiplierInt32ToInt16(relu6_multiplier_fixedpoint, &quant_arg_.relu6_multiplier_fixedpoint_int16);
return RET_OK;
} }
void SigmoidInt8CPUKernel::MultiplierInt32ToInt16(int32_t input, int16_t *output) { int SigmoidInt8CPUKernel::Init() {
MS_ASSERT(input >= 0); lite::Tensor *input = in_tensors_.at(0);
if (input >= std::numeric_limits<int32_t>::max() - (1 << 15)) { lite::Tensor *output = out_tensors_.at(0);
*output = std::numeric_limits<int16_t>::max(); const float input_scale = input->GetQuantParams().front().scale;
return; const int32_t input_zp = input->GetQuantParams().front().zeroPoint;
const float output_scale = output->GetQuantParams().front().scale;
const int32_t output_zp = output->GetQuantParams().front().zeroPoint;
if (output_scale != (1.0f / 256) || output_zp != -128) {
MS_LOG(ERROR) << "Output scale is : " << output_scale << ", should be 1/256. Output zp is : " << output_zp
<< ", should be -128.";
return RET_ERROR;
} }
*output = (input + (1 << 15)) >> 16; CalculateTableList(table_list_, input_scale, input_zp);
return RET_OK;
} }
int SigmoidInt8CPUKernel::ReSize() { return RET_OK; } int SigmoidInt8CPUKernel::ReSize() { return RET_OK; }
@ -71,11 +69,10 @@ int SigmoidInt8CPUKernel::DoActivation(int task_id) {
auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData()); auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
auto length = in_tensors_.at(0)->ElementsNum(); auto length = in_tensors_.at(0)->ElementsNum();
int stride = UP_DIV(length, op_parameter_->thread_num_); int stride = UP_DIV(length, op_parameter_->thread_num_);
int count = MSMIN(stride, length - stride * task_id); int count = MSMIN(stride, length - stride * task_id);
SigmoidInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, &quant_arg_); SigmoidInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, table_list_);
return RET_OK; return RET_OK;
} }

@ -36,8 +36,7 @@ class SigmoidInt8CPUKernel : public LiteKernel {
int DoActivation(int task_id); int DoActivation(int task_id);
private: private:
SigmoidQuantArg quant_arg_; int8_t table_list_[256]{0};
void MultiplierInt32ToInt16(int32_t input, int16_t *output);
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

Loading…
Cancel
Save