!3835 add lite kernel relu_int8 and hswish_int8

Merge pull request !3835 from sunsuodong/int8_relu_hswish
5 years ago · d7bc28dcde
parent bab5aedd49 64c2f195b7
commit d7bc28dcde
12 changed files with 632 additions and 6 deletions
--- a/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc
@ -0,0 +1,66 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/fp32/activation.h"
+#include "src/runtime/kernel/arm/int8/relu_int8.h"
+#include "src/runtime/kernel/arm/int8/hswish_int8.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/runtime_api.h"
+#include "include/errorcode.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Activation;
+
+namespace mindspore::kernel {
+kernel::LiteKernel *CpuActivationInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
+                                               const std::vector<lite::tensor::Tensor *> &outputs,
+                                               OpParameter *parameter, const lite::Context *ctx,
+                                               const KernelKey &desc) {
+  if (parameter == nullptr) {
+    MS_LOG(ERROR) << "parameter is nullptr";
+    return nullptr;
+  }
+  MS_ASSERT(inputs.at(0));
+  auto type = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
+  kernel::LiteKernel *kernel = nullptr;
+  switch (static_cast<schema::ActivationType>(type)) {
+    case schema::ActivationType_RELU:
+      kernel = new (std::nothrow) ReluInt8CPUKernel(parameter, inputs, outputs, ctx);
+      break;
+    case schema::ActivationType_HSWISH:
+      kernel = new (std::nothrow) HswishInt8CPUKernel(parameter, inputs, outputs, ctx);
+      break;
+    default:
+      break;
+  }
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "Create kernel failed";
+    return nullptr;
+  }
+  auto ret = kernel->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
+                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
+  }
+  return kernel;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Activation, CpuActivationInt8KernelCreator)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc
@ -0,0 +1,99 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/int8/hswish_int8.h"
+#include <limits>
+#include "src/runtime/kernel/arm/opclib/int8/hswish_int8.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/runtime_api.h"
+#include "include/errorcode.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::ActivationType_HSWISH;
+
+namespace mindspore::kernel {
+int HswishInt8CPUKernel::Init() {
+  lite::tensor::Tensor *input = inputs_.at(0);
+  lite::tensor::Tensor *output = outputs_.at(0);
+  MS_ASSERT(input);
+  MS_ASSERT(output);
+
+  quant_arg_.input_scale = input->GetQuantParams().front().scale;
+  quant_arg_.input_zp = input->GetQuantParams().front().zeroPoint;
+  quant_arg_.output_scale = output->GetQuantParams().front().scale;
+  quant_arg_.output_zp = output->GetQuantParams().front().zeroPoint;
+
+  const float output_multiplier = (1.0f / 128.0f) * quant_arg_.input_scale / quant_arg_.output_scale;
+
+  int32_t output_multiplier_fixedpoint;
+  QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint, &quant_arg_.output_multiplier_exponent);
+  MS_ASSERT(quant_arg_.output_multiplier_exponent <= 0);
+  MultiplierInt32ToInt16(output_multiplier_fixedpoint, &quant_arg_.output_multiplier_fixedpoint_int16);
+
+  const float relu6_multiplier = (1.0f / 128.0f) * quant_arg_.input_scale / (3.0f / 32768.0f);
+  int32_t relu6_multiplier_fixedpoint;
+  QuantizeMultiplier(relu6_multiplier, &relu6_multiplier_fixedpoint, &quant_arg_.relu6_multiplier_exponent);
+  MultiplierInt32ToInt16(relu6_multiplier_fixedpoint, &quant_arg_.relu6_multiplier_fixedpoint_int16);
+
+  return RET_OK;
+}
+
+void HswishInt8CPUKernel::MultiplierInt32ToInt16(int32_t input, int16_t *output) {
+  MS_ASSERT(input >= 0);
+  if (input >= std::numeric_limits<int32_t>::max() - (1 << 15)) {
+    *output = std::numeric_limits<int16_t>::max();
+    return;
+  }
+  *output = (input + (1 << 15)) >> 16;
+}
+
+int HswishInt8CPUKernel::ReSize() { return RET_OK; }
+
+int HswishInt8CPUKernel::DoActivation(int task_id) {
+  auto input_addr = reinterpret_cast<int8_t *>(inputs_.at(0)->Data());
+  auto output_addr = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());
+  auto length = inputs_.at(0)->ElementsNum();
+
+  int stride = UP_DIV(length, thread_count_);
+  int count = MSMIN(stride, length - stride * task_id);
+
+  HSwishInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, &quant_arg_);
+  return RET_OK;
+}
+
+int HswishInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
+  auto activation_kernel = reinterpret_cast<HswishInt8CPUKernel *>(cdata);
+  auto error_code = activation_kernel->DoActivation(task_id);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "HswishInt8Run error task_id[" << task_id << "] error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int HswishInt8CPUKernel::Run() {
+  int error_code = LiteBackendParallelLaunch(HswishInt8Run, this, thread_count_);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "HswishInt8Run function error error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_HSWISH_INT8_H_
+#define MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_HSWISH_INT8_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "src/runtime/kernel/arm/opclib/int8/hswish_int8.h"
+
+namespace mindspore::kernel {
+class HswishInt8CPUKernel : public LiteKernel {
+ public:
+  HswishInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
+                      const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
+      : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->threadNum) {}
+  ~HswishInt8CPUKernel() override = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int DoActivation(int task_id);
+
+ private:
+  int thread_count_;
+  HswishQuantArg quant_arg_;
+  void MultiplierInt32ToInt16(int32_t input, int16_t *output);
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_HSWISH_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relu_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relu_int8.cc
@ -0,0 +1,83 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/int8/relu_int8.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/runtime_api.h"
+#include "include/errorcode.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::ActivationType_RELU;
+
+namespace mindspore::kernel {
+int ReluInt8CPUKernel::Init() {
+  lite::tensor::Tensor *input = inputs_.at(0);
+  lite::tensor::Tensor *output = outputs_.at(0);
+  MS_ASSERT(input);
+  MS_ASSERT(output);
+
+  quant_arg_.input_arg.scale_ = input->GetQuantParams().front().scale;
+  quant_arg_.input_arg.zp_ = input->GetQuantParams().front().zeroPoint;
+  quant_arg_.output_arg.scale_ = output->GetQuantParams().front().scale;
+  quant_arg_.output_arg.zp_ = output->GetQuantParams().front().zeroPoint;
+
+  const double multiplier = quant_arg_.input_arg.scale_ / quant_arg_.output_arg.scale_;
+  QuantizeMultiplierSmallerThanOne(multiplier, &quant_arg_.input_multiplier_, &quant_arg_.input_shift_);
+
+  int left_shift = -quant_arg_.input_shift_ > 0 ? -quant_arg_.input_shift_ : 0;
+  quant_arg_.right_shift_ = -quant_arg_.input_shift_ > 0 ? 0 : quant_arg_.input_shift_;
+  quant_arg_.left_shift_result_ = (1 << left_shift);
+
+  return RET_OK;
+}
+
+int ReluInt8CPUKernel::ReSize() { return RET_OK; }
+
+int ReluInt8CPUKernel::DoActivation(int task_id) {
+  auto input_addr = reinterpret_cast<int8_t *>(inputs_.at(0)->Data());
+  auto output_addr = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());
+  auto length = inputs_.at(0)->ElementsNum();
+
+  int stride = UP_DIV(length, thread_count_);
+  int count = MSMIN(stride, length - stride * task_id);
+
+  ReluInt8(input_addr + stride * task_id, count, output_addr + stride * task_id, &quant_arg_);
+  return RET_OK;
+}
+
+int ReluInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
+  auto activation_kernel = reinterpret_cast<ReluInt8CPUKernel *>(cdata);
+  auto error_code = activation_kernel->DoActivation(task_id);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "ReluInt8Run error task_id[" << task_id << "] error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int ReluInt8CPUKernel::Run() {
+  int error_code = LiteBackendParallelLaunch(ReluInt8Run, this, thread_count_);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "ReluInt8Run function error error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relu_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relu_int8.h
@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_ACTIVATION_H_
+#define MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_ACTIVATION_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "src/runtime/kernel/arm/opclib/fp32/activation.h"
+#include "src/runtime/kernel/arm/opclib/int8/relu_int8.h"
+
+namespace mindspore::kernel {
+class ReluInt8CPUKernel : public LiteKernel {
+ public:
+  ReluInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
+                    const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
+      : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->threadNum) {
+    type_ = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
+  }
+  ~ReluInt8CPUKernel() override = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int DoActivation(int task_id);
+
+ private:
+  int thread_count_;
+  int type_;
+  ReluQuantArg quant_arg_;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_BACKEND_ARM_INT8_ACTIVATION_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/activation.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/activation.h
@ -19,6 +19,7 @@
 #include <math.h>
 #include "src/runtime/kernel/arm/opclib/op_base.h"
 #include "src/runtime/kernel/arm/opclib/errorcode.h"
+#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"

 struct ActivationParameter {
  OpParameter op_parameter_;
@ -75,4 +76,3 @@ inline int HSwish(const float *src, int length, float *dst) {
 }

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ACTIVATION_H_
-
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/hswish_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/hswish_int8.cc
@ -0,0 +1,53 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/opclib/int8/hswish_int8.h"
+
+int16_t SaturatingLeftShift(int16_t value, int shift_num) {
+  int32_t result = (int32_t)value * (1 << shift_num);
+  return MSMAX(MSMIN(result, SHRT_MAX), SHRT_MIN);
+}
+
+int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg) {
+  for (int i = 0; i < length; i++) {
+    const int16_t input_value = src[i] - arg->input_zp;
+    const int16_t input_value_scale = input_value * (1 << 7);
+    const int16_t input_value_on_preshift_output_scale =
+      SaturatingRoundingDoublingHighMulInt16(input_value_scale, arg->output_multiplier_fixedpoint_int16);
+    int16_t relu6_value = input_value_scale;
+    if (arg->relu6_multiplier_exponent > 0) {
+      relu6_value = SaturatingLeftShift(relu6_value, arg->relu6_multiplier_exponent - 1);
+    }
+    relu6_value = SaturatingRoundingDoublingHighMulInt16(relu6_value, arg->relu6_multiplier_fixedpoint_int16);
+
+    if (arg->relu6_multiplier_exponent > 0) {
+      relu6_value = SaturatingLeftShift(relu6_value, 1);
+    }
+    if (arg->relu6_multiplier_exponent < 0) {
+      relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent);
+    }
+    relu6_value = (relu6_value + (1 << 15)) >> 1;
+    const int16_t preshift_output_value =
+      SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale);
+
+    int16_t output = RoundingDivideByPOT(preshift_output_value, -arg->output_multiplier_exponent);
+    output += arg->output_zp;
+    output = MSMIN(output, 127);
+    output = MSMAX(output, -128);
+    dst[i] = (int8_t)output;
+  }
+  return OPCLIB_OK;
+}
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/hswish_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/hswish_int8.h
@ -0,0 +1,37 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_HSWISH_INT8_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_HSWISH_INT8_H_
+
+#include <math.h>
+#include "src/runtime/kernel/arm/opclib/op_base.h"
+#include "src/runtime/kernel/arm/opclib/errorcode.h"
+#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"
+
+struct HswishQuantArg {
+  double input_scale;
+  int32_t input_zp;
+  double output_scale;
+  int32_t output_zp;
+  int16_t relu6_multiplier_fixedpoint_int16;
+  int32_t relu6_multiplier_exponent;
+  int16_t output_multiplier_fixedpoint_int16;
+  int32_t output_multiplier_exponent;
+};
+
+int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg);
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_HSWISH_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/relu_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/relu_int8.h
@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_RELU_INT8_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_RELU_INT8_H_
+
+#include <math.h>
+#include "src/runtime/kernel/arm/opclib/op_base.h"
+#include "src/runtime/kernel/arm/opclib/errorcode.h"
+#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"
+
+struct ReluQuantArg {
+  QuantArg input_arg;
+  QuantArg output_arg;
+  int input_multiplier_;
+  int input_shift_;
+  int right_shift_;
+  int left_shift_result_;
+};
+
+inline void ReluInt8(const int8_t *src, int length, int8_t *dst, ReluQuantArg *arg) {
+  for (int i = 0; i < length; ++i) {
+    if (src[i] <= arg->input_arg.zp_) {
+      dst[i] = arg->output_arg.zp_;
+      continue;
+    }
+    const int32_t input_val = src[i] - arg->input_arg.zp_;
+    const int32_t scaled_input = SaturatingRoundingDoublingHighMul(input_val, arg->input_multiplier_);
+    const int32_t shifted_input = RoundingDivideByPOT(scaled_input * arg->left_shift_result_, -arg->right_shift_);
+    const int32_t output = shifted_input + arg->output_arg.zp_;
+    dst[i] = (int8_t)output;
+  }
+}
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_RELU_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/fixed_point.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/fixed_point.h
@ -35,10 +35,19 @@ inline int SaturatingRoundingDoublingHighMul(int a, int b) {
  int64_t ab = ((int64_t)a) * ((int64_t)b);
  int64_t rounding = ab >= 0 ? (1ll << 30) : (1ll - (1ll << 30));
  // do not apply right shift to potential negetive values
-  int ab_mantissa = (int) ((ab + rounding) / (1ll << 31));
+  int ab_mantissa = (int)((ab + rounding) / (1ll << 31));
  return ab_mantissa;
 }

+inline int16_t SaturatingRoundingDoublingHighMulInt16(int16_t a, int16_t b) {
+  if (a == SHRT_MIN && b == SHRT_MIN) {
+    return SHRT_MAX;
+  }
+  int32_t ab = ((int32_t)a) * ((int32_t)b);
+  int16_t rounding = ab >= 0 ? (1ll << 14) : (1ll - (1ll << 14));
+  return (int16_t)((ab + rounding) / (1ll << 15));
+}
+
 // division by a 2^exponent with rounding
 // or arithmetic right shift with rouding
 inline int RoundingDivideByPOT(int x, int exponent) {
@ -62,10 +71,7 @@ inline int32x4_t RoundingDivideByPOTInt32x4(int32x4_t x, int exponent) {
  return vrshlq_s32(fixed_up_x, shift_vec);
 }

-inline int32x4_t SaturatingRoundingDoublingHighMulInt32x4(int32x4_t a, int32x4_t b) {
-  return vqrdmulhq_s32(a, b);
-}
+inline int32x4_t SaturatingRoundingDoublingHighMulInt32x4(int32x4_t a, int32x4_t b) { return vqrdmulhq_s32(a, b); }
 #endif

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_FIXED_POINT_H_
-
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
@ -0,0 +1,73 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include "common/common_test.h"
+#include "mindspore/lite/src/runtime/kernel/arm/fp32/activation.h"
+#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/activation.h"
+#include "mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h"
+#include "mindspore/lite/src/kernel_registry.h"
+#include "mindspore/lite/include/context.h"
+
+namespace mindspore {
+class TestHSwishInt8 : public mindspore::Common {
+ public:
+  TestHSwishInt8() {}
+};
+
+TEST_F(TestHSwishInt8, HSwish) {
+  lite::tensor::Tensor in_tensor(kNumberTypeInt8, {4, 4});
+  lite::tensor::Tensor out_tensor(kNumberTypeInt8, {4, 4});
+
+  int8_t input_data[] = {-116, -105, -93, -35, 23, 35, 46, 104};  // -3.5f, -3.0f, -2.5f, 0.f, 2.5f, 3.0f, 3.5f, 6.0f
+  int8_t output_data[8] = {0};
+  in_tensor.SetData(input_data);
+  out_tensor.SetData(output_data);
+
+  const lite::tensor::QuantArg quant_in = {0.0431373f, -35};   // -4.0 -- 7.0
+  const lite::tensor::QuantArg quant_out = {0.0392157f, -52};  // -3.0 -- 7.0
+  in_tensor.AddQuantParam(quant_in);
+  out_tensor.AddQuantParam(quant_out);
+
+  std::vector<lite::tensor::Tensor *> inputs = {&in_tensor};
+  std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
+
+  ActivationParameter parameter = {0};
+  parameter.op_parameter_.type_ = schema::PrimitiveType_Activation;
+  parameter.type_ = schema::ActivationType_HSWISH;
+
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Activation};
+
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+
+  auto ctx = std::make_shared<lite::Context>();
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc);
+  ASSERT_NE(kernel, nullptr);
+
+  auto ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  int8_t expect[8] = {-52, -52, -57, -52, 7, 25, 37, 101};  // 0, 0, -0.208333, 0, 2.29167, 3, 3.5, 6
+  for (int i = 0; i < 8; ++i) {
+    EXPECT_EQ(output_data[i], expect[i]);
+  }
+
+  in_tensor.SetData(nullptr);
+  out_tensor.SetData(nullptr);
+}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relu_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relu_int8_tests.cc
@ -0,0 +1,71 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include "common/common_test.h"
+#include "mindspore/lite/src/runtime/kernel/arm/int8/relu_int8.h"
+#include "mindspore/lite/src/kernel_registry.h"
+#include "mindspore/lite/include/context.h"
+
+namespace mindspore {
+class TestReluInt8 : public mindspore::Common {
+ public:
+  TestReluInt8() {}
+};
+
+TEST_F(TestReluInt8, Relu) {
+  lite::tensor::Tensor in_tensor(kNumberTypeInt8, {2, 2});
+  lite::tensor::Tensor out_tensor(kNumberTypeInt8, {2, 2});
+
+  int8_t input_data[] = {-102, 25, -51, 89};  // -0.8 0.2 -0.4 0.7
+  int8_t output_data[4] = {0};
+  in_tensor.SetData(input_data);
+  out_tensor.SetData(output_data);
+
+  const lite::tensor::QuantArg quant_in = {0.00784314f, 0};  // -1.0--1.0 ->
+  const lite::tensor::QuantArg quant_out = {0.00784314f, 0};
+  in_tensor.AddQuantParam(quant_in);
+  out_tensor.AddQuantParam(quant_out);
+
+  std::vector<lite::tensor::Tensor *> inputs = {&in_tensor};
+  std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
+
+  ActivationParameter parameter = {0};
+  parameter.op_parameter_.type_ = schema::PrimitiveType_Activation;
+  parameter.type_ = schema::ActivationType_RELU;
+
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Activation};
+
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+
+  auto ctx = std::make_shared<lite::Context>();
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc);
+  ASSERT_NE(kernel, nullptr);
+
+  auto ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  int8_t expect0[4] = {0, 26, 0, 90};  //
+  for (int i = 0; i < 4; ++i) {
+    EXPECT_EQ(output_data[i], expect0[i]);
+  }
+
+  in_tensor.SetData(nullptr);
+  out_tensor.SetData(nullptr);
+}
+}  // namespace mindspore