!8115 [MSLITE] int8 transpose op & int8 layer norm op
Merge pull request !8115 from ling/srpull/8115/MERGE
commit
1797396aa8
@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_TRANSPOSE_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_TRANSPOSE_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "nnacl/transpose.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int DoTransposeFp32(const float *in_data, float *out_data, int *input_shape, const int *output_shape,
|
||||
TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_TRANSPOSE_H_
|
@ -0,0 +1,64 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/int8/layer_norm_int8.h"
|
||||
|
||||
/*
|
||||
* origin : (x-mean) / sqrt(variance + epsilon) * gamma + beta
|
||||
* quant : (x-mean) / sqrt(sum(x * x) - mean * mean) * gamma + beta
|
||||
*
|
||||
* */
|
||||
int LayerNormInt8(const int8_t *src_data, const int8_t *gamma_data, const int32_t *beta_data, int8_t *dst_data,
|
||||
bool affine, int outer_size, int inner_size, LayerNormQuantArg *quant_) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
||||
if (affine && (gamma_data == NULL || beta_data == NULL)) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
||||
for (int out_index = 0; out_index < outer_size; out_index++) {
|
||||
const int8_t *src = src_data + out_index * inner_size;
|
||||
int8_t *dst = dst_data + out_index * inner_size;
|
||||
int32_t mean = 0;
|
||||
int32_t square_mean = 0;
|
||||
for (int in_index = 0; in_index < inner_size; in_index++) {
|
||||
int32_t tmp_src = src[in_index] - quant_->in_quant_arg_.zp_;
|
||||
mean += tmp_src;
|
||||
square_mean += tmp_src * tmp_src;
|
||||
}
|
||||
mean = round(mean / inner_size);
|
||||
square_mean = round(square_mean / inner_size);
|
||||
|
||||
int32_t variance_value = square_mean - mean * mean;
|
||||
|
||||
int32_t multiplier;
|
||||
int32_t shift;
|
||||
GetSqrtQuantMultiplierExp(variance_value, -1, &multiplier, &shift);
|
||||
|
||||
for (int in_index = 0; in_index < inner_size; in_index++) {
|
||||
int32_t in = src[in_index] - quant_->in_quant_arg_.zp_ - mean;
|
||||
int32_t tmp = RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(in * (1 << 7), multiplier), -shift);
|
||||
if (affine) {
|
||||
tmp = tmp * (gamma_data[in_index] - quant_->gamma_quant_arg_.zp_) + beta_data[in_index];
|
||||
}
|
||||
int32_t out = MultiplyByQuantizedMultiplier(tmp, quant_->multiplier_, quant_->shift_left_, quant_->shift_right_);
|
||||
dst[in_index] = (int8_t)MSMIN(quant_->output_activation_max_, MSMAX(quant_->output_activation_max_, out));
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_INT8_LAYER_NORM_H_
|
||||
#define MINDSPORE_LITE_NNACL_INT8_LAYER_NORM_H_
|
||||
|
||||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/layer_norm_parameter.h"
|
||||
#include "nnacl/quantization/fixed_point.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int LayerNormInt8(const int8_t *src_data, const int8_t *gamma_data, const int32_t *beta_data, int8_t *dst_data,
|
||||
bool affine, int outer_size, int inner_size, LayerNormQuantArg *quant_);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_INT8_LAYER_NORM_H_
|
@ -0,0 +1,200 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/int8/transpose_int8.h"
|
||||
void TransposeDim2Int8(const int8_t *in_data, int8_t *out_data, const int *strides, int *out_strides, const int *perm,
|
||||
const int *output_shape, int h_start, int h_end) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int output0 = output_shape[0];
|
||||
const int output1 = output_shape[1];
|
||||
for (int i = 0; i < output0; ++i) {
|
||||
int out_stride0_i = i * output1;
|
||||
int stride0_i = i * 1 * stride0;
|
||||
for (int j = 0; j < output1; ++j) {
|
||||
out_data[out_stride0_i + j] = in_data[stride0_i + j * stride1];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void TransposeDim3Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape, int h_start, int h_end) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
const int out_stride0 = out_strides[0];
|
||||
const int out_stride1 = out_strides[1];
|
||||
const int output0 = output_shape[0];
|
||||
const int output1 = output_shape[1];
|
||||
const int output2 = output_shape[2];
|
||||
for (int i = 0; i < output0; ++i) {
|
||||
int out_stride0_i = i * out_stride0;
|
||||
int stride0_i = i * stride0;
|
||||
for (int j = 0; j < output1; ++j) {
|
||||
int out_stride1_j = j * out_stride1;
|
||||
int stride1_j = j * stride1;
|
||||
for (int k = 0; k < output2; ++k) {
|
||||
out_data[out_stride0_i + out_stride1_j + k] = in_data[stride0_i + stride1_j + k * stride2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeDim4Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape, int h_start, int h_end) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
const int stride3 = strides[perm[3]];
|
||||
const int out_stride0 = out_strides[0];
|
||||
const int out_stride1 = out_strides[1];
|
||||
const int out_stride2 = out_strides[2];
|
||||
const int output0 = output_shape[0];
|
||||
const int output1 = output_shape[1];
|
||||
const int output2 = output_shape[2];
|
||||
const int output3 = output_shape[3];
|
||||
|
||||
for (int i = 0; i < output0; ++i) {
|
||||
int out_stride0_i = i * out_stride0;
|
||||
int stride0_i = i * stride0;
|
||||
for (int j = 0; j < output1; ++j) {
|
||||
int out_stride1_j = j * out_stride1;
|
||||
int stride1_j = j * stride1;
|
||||
for (int k = 0; k < output2; ++k) {
|
||||
int out_stride2_k = k * out_stride2;
|
||||
int stride2_k = k * stride2;
|
||||
for (int m = 0; m < output3; ++m) {
|
||||
out_data[out_stride0_i + out_stride1_j + out_stride2_k + m] =
|
||||
in_data[stride0_i + stride1_j + stride2_k + m * stride3];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeDim5Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape, int h_start, int h_end) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
const int stride3 = strides[perm[3]];
|
||||
const int stride4 = strides[perm[4]];
|
||||
const int out_stride0 = out_strides[0];
|
||||
const int out_stride1 = out_strides[1];
|
||||
const int out_stride2 = out_strides[2];
|
||||
const int out_stride3 = out_strides[3];
|
||||
const int output0 = output_shape[0];
|
||||
const int output1 = output_shape[1];
|
||||
const int output2 = output_shape[2];
|
||||
const int output3 = output_shape[3];
|
||||
const int output4 = output_shape[4];
|
||||
|
||||
for (int i = 0; i < output0; ++i) {
|
||||
int out_stride0_i = i * out_stride0;
|
||||
int stride0_i = i * stride0;
|
||||
for (int j = 0; j < output1; ++j) {
|
||||
int out_stride1_j = j * out_stride1;
|
||||
int stride1_j = j * stride1;
|
||||
for (int k = 0; k < output2; ++k) {
|
||||
int out_stride2_k = k * out_stride2;
|
||||
int stride2_k = k * stride2;
|
||||
for (int m = 0; m < output3; ++m) {
|
||||
int out_stride3_m = m * out_stride3;
|
||||
int stride3_m = m * stride3;
|
||||
for (int n = 0; n < output4; ++n) {
|
||||
out_data[out_stride0_i + out_stride1_j + out_stride2_k + out_stride3_m + n] =
|
||||
in_data[stride0_i + stride1_j + stride2_k + stride3_m + n * stride4];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeCommInt8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape, int h_start, int h_end, int dims, int *size,
|
||||
int *position) {
|
||||
*(size + dims - 1) = 1;
|
||||
for (int i = dims - 1; i > 0; --i) {
|
||||
*(size + i - 1) = *(size + i) * output_shape[i];
|
||||
}
|
||||
|
||||
for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) {
|
||||
int pos = idx;
|
||||
int output_idx = 0;
|
||||
int input_idx = 0;
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
*(position + i) = pos / *(size + i);
|
||||
int out_stride = i < dims - 1 ? out_strides[i] : 1;
|
||||
output_idx += (*(position + i) * out_stride);
|
||||
input_idx += (*(position + i) * strides[perm[i]]);
|
||||
pos -= *(position + i) * (*(size + i));
|
||||
}
|
||||
out_data[output_idx] = in_data[input_idx];
|
||||
}
|
||||
}
|
||||
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, int *input_shape, const int *output_shape,
|
||||
TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position) {
|
||||
if (in_data == NULL || out_data == NULL) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
|
||||
int *perm = transpose_param->perm_;
|
||||
int *strides = transpose_param->strides_;
|
||||
int *out_strides = transpose_param->out_strides_;
|
||||
int num_axes = transpose_param->num_axes_;
|
||||
|
||||
if (num_axes < 2) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
|
||||
// check if transpose is needed
|
||||
bool needTranspose = false;
|
||||
for (int i = 1; i < num_axes; i++) {
|
||||
if (perm[i] - perm[i - 1] != 1) {
|
||||
needTranspose = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!needTranspose) {
|
||||
(void)memcpy(out_data, in_data, transpose_param->data_size_);
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
switch (num_axes) {
|
||||
case 2:
|
||||
TransposeDim2Int8(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end);
|
||||
break;
|
||||
case 3:
|
||||
TransposeDim3Int8(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end);
|
||||
break;
|
||||
case 4:
|
||||
TransposeDim4Int8(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end);
|
||||
break;
|
||||
case 5:
|
||||
TransposeDim5Int8(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end);
|
||||
break;
|
||||
default:
|
||||
TransposeCommInt8(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end, num_axes, dim_size,
|
||||
position);
|
||||
break;
|
||||
}
|
||||
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_INT8_TRANSPOSE_INT8_H_
|
||||
#define MINDSPORE_LITE_NNACL_INT8_TRANSPOSE_INT8_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "nnacl/transpose.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, int *input_shape, const int *output_shape,
|
||||
TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_INT8_TRANSPOSE_INT8_H_
|
@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/int8/layer_norm_int8.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_LayerNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
void LayerNormInt8CPUKernel::SetQuantArgs() {
|
||||
lite::Tensor *input = in_tensors_.at(0);
|
||||
lite::Tensor *output = out_tensors_.at(0);
|
||||
|
||||
quant_param_.in_quant_arg_.zp_ = input->GetQuantParams().front().zeroPoint;
|
||||
quant_param_.in_quant_arg_.scale_ = input->GetQuantParams().front().scale;
|
||||
quant_param_.out_quant_arg_.zp_ = output->GetQuantParams().front().zeroPoint;
|
||||
quant_param_.out_quant_arg_.scale_ = output->GetQuantParams().front().scale;
|
||||
|
||||
quant_param_.output_activation_min_ = std::numeric_limits<int8_t>::min();
|
||||
quant_param_.output_activation_max_ = std::numeric_limits<int8_t>::max();
|
||||
|
||||
if (param_->elementwise_affine_) {
|
||||
lite::Tensor *gamma_tensor = out_tensors_.at(1);
|
||||
quant_param_.gamma_quant_arg_.zp_ = gamma_tensor->GetQuantParams().front().zeroPoint;
|
||||
quant_param_.gamma_quant_arg_.scale_ = gamma_tensor->GetQuantParams().front().scale;
|
||||
}
|
||||
|
||||
double in_scale;
|
||||
if (param_->elementwise_affine_) {
|
||||
in_scale = static_cast<double>(quant_param_.in_quant_arg_.scale_ * quant_param_.gamma_quant_arg_.scale_);
|
||||
} else {
|
||||
in_scale = static_cast<double>(quant_param_.in_quant_arg_.scale_);
|
||||
}
|
||||
double real_multiplier = in_scale / static_cast<double>(quant_param_.out_quant_arg_.scale_);
|
||||
|
||||
QuantizeRoundParameter(real_multiplier, &quant_param_.multiplier_, &quant_param_.shift_left_,
|
||||
&quant_param_.shift_right_);
|
||||
return;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::Init() {
|
||||
SetQuantArgs();
|
||||
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::ReSize() {
|
||||
auto shape = in_tensors_.front()->shape();
|
||||
outer_size_ = 1;
|
||||
inner_size_ = 1;
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (i + param_->normalized_dims_ < shape.size()) {
|
||||
outer_size_ *= shape[i];
|
||||
} else {
|
||||
inner_size_ *= shape[i];
|
||||
}
|
||||
}
|
||||
|
||||
param_->thread_count_ = MSMIN(outer_size_, op_parameter_->thread_num_);
|
||||
param_->thread_outsize_ = UP_DIV(outer_size_, param_->thread_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8Run(void *cdata, int task_id) {
|
||||
auto kernel = reinterpret_cast<LayerNormInt8CPUKernel *>(cdata);
|
||||
kernel->DoExecute(task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::DoExecute(int task_id) {
|
||||
int current_out_size = outer_size_ - task_id * param_->thread_outsize_;
|
||||
current_out_size = MSMIN(current_out_size, param_->thread_outsize_);
|
||||
if (current_out_size <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
|
||||
int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
|
||||
|
||||
LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_affine_, current_out_size,
|
||||
inner_size_, &quant_param_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::Run() {
|
||||
src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
|
||||
dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
|
||||
if (param_->elementwise_affine_) {
|
||||
gamma_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(1)->MutableData());
|
||||
beta_ptr_ = reinterpret_cast<int32_t *>(in_tensors_.at(2)->MutableData());
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormInt8Run, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LayerNormInt8Run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuLayerNormInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::InnerContext *ctx, const KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
auto *kernel = new (std::nothrow) LayerNormInt8CPUKernel(parameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LayerNorm, CpuLayerNormInt8KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_LAYERNORM_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_LAYERNORM_INT8_H_
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include "nnacl/int8/layer_norm_int8.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class LayerNormInt8CPUKernel : public LiteKernel {
|
||||
public:
|
||||
LayerNormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param_ = reinterpret_cast<LayerNormParameter *>(parameter);
|
||||
}
|
||||
~LayerNormInt8CPUKernel() override{};
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
public:
|
||||
int DoExecute(int task_id);
|
||||
|
||||
private:
|
||||
void SetQuantArgs();
|
||||
|
||||
private:
|
||||
LayerNormParameter *param_ = nullptr;
|
||||
LayerNormQuantArg quant_param_;
|
||||
int outer_size_;
|
||||
int inner_size_;
|
||||
int8_t *src_ptr_ = nullptr;
|
||||
int8_t *dst_ptr_ = nullptr;
|
||||
int8_t *gamma_ptr_ = nullptr;
|
||||
int32_t *beta_ptr_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_LAYERNORM_INT8_H_
|
@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/int8/transpose_int8.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::lite::RET_OP_EXECUTE_FAILURE;
|
||||
using mindspore::schema::PrimitiveType_Transpose;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
TransposeInt8CPUKernel::~TransposeInt8CPUKernel() { return; }
|
||||
|
||||
int TransposeInt8CPUKernel::Init() {
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int TransposeInt8Run(void *cdata, int task_id) {
|
||||
auto transpose_int8 = reinterpret_cast<TransposeInt8CPUKernel *>(cdata);
|
||||
auto ret = transpose_int8->DoTranspose(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DoTranspose error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_OP_EXECUTE_FAILURE;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void TransposeInt8CPUKernel::FreeTmpBuf() {
|
||||
if (!extra_dims_) {
|
||||
return;
|
||||
}
|
||||
if (dim_size_ != nullptr) {
|
||||
context_->allocator->Free(dim_size_);
|
||||
dim_size_ = nullptr;
|
||||
}
|
||||
if (position_ != nullptr) {
|
||||
context_->allocator->Free(position_);
|
||||
position_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int TransposeInt8CPUKernel::MallocTmpBuf() {
|
||||
if (!extra_dims_) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int dims = out_tensors_[0]->shape().size();
|
||||
|
||||
dim_size_ = reinterpret_cast<int *>(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int)));
|
||||
if (dim_size_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
position_ = reinterpret_cast<int *>(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int)));
|
||||
if (position_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc data failed";
|
||||
context_->allocator->Free(dim_size_);
|
||||
dim_size_ = nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TransposeInt8CPUKernel::ReSize() {
|
||||
auto in_tensor = in_tensors_.front();
|
||||
auto out_tensor = out_tensors_.front();
|
||||
auto in_shape = in_tensor->shape();
|
||||
auto out_shape = out_tensor->shape();
|
||||
|
||||
transpose_param_->data_size_ = in_tensor->Size();
|
||||
|
||||
transpose_param_->strides_[transpose_param_->num_axes_ - 1] = 1;
|
||||
transpose_param_->out_strides_[transpose_param_->num_axes_ - 1] = 1;
|
||||
for (int i = transpose_param_->num_axes_ - 2; i >= 0; i--) {
|
||||
transpose_param_->strides_[i] = in_shape[i + 1] * transpose_param_->strides_[i + 1];
|
||||
transpose_param_->out_strides_[i] = out_shape[i + 1] * transpose_param_->out_strides_[i + 1];
|
||||
}
|
||||
|
||||
extra_dims_ = out_shape.size() > MAX_TRANSPOSE_DIM_SIZE;
|
||||
|
||||
num_unit_ = static_cast<int>(in_shape.at(transpose_param_->perm_[kNHWC_H]));
|
||||
thread_h_num_ = MSMIN(thread_num_, num_unit_);
|
||||
thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TransposeInt8CPUKernel::DoTranspose(int task_id) {
|
||||
int num_unit_thread = MSMIN(thread_h_stride_, num_unit_ - task_id * thread_h_stride_);
|
||||
if (num_unit_thread <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
int thread_offset = task_id * thread_h_stride_;
|
||||
|
||||
int *dim_size = nullptr;
|
||||
int *position = nullptr;
|
||||
if (extra_dims_) {
|
||||
dim_size = dim_size_ + task_id * transpose_param_->num_axes_;
|
||||
position = position_ + task_id * transpose_param_->num_axes_;
|
||||
}
|
||||
|
||||
auto ret = DoTransposeInt8(in_ptr_, out_ptr_, in_shape_, out_shape_, transpose_param_, thread_offset,
|
||||
thread_offset + num_unit_thread, dim_size, position);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TransposeInt8CPUKernel::Run() {
|
||||
auto in_tensor = in_tensors_.front();
|
||||
auto out_tensor = out_tensors_.front();
|
||||
|
||||
in_ptr_ = reinterpret_cast<int8_t *>(in_tensor->data_c());
|
||||
out_ptr_ = reinterpret_cast<int8_t *>(out_tensor->data_c());
|
||||
|
||||
in_shape_ = in_tensor->shape().data();
|
||||
out_shape_ = out_tensor->shape().data();
|
||||
|
||||
int ret = MallocTmpBuf();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MallocTmpBuf error_code[" << ret << "]";
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, TransposeInt8Run, this, thread_h_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
|
||||
}
|
||||
|
||||
FreeTmpBuf();
|
||||
in_shape_ = nullptr;
|
||||
out_shape_ = nullptr;
|
||||
return ret;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuTransposeInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Transpose);
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "desc type is not Transpose";
|
||||
return nullptr;
|
||||
}
|
||||
auto *kernel = new (std::nothrow) TransposeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "New kernel fails.";
|
||||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Transpose, CpuTransposeInt8KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_TRANSPOSE_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_TRANSPOSE_INT8_H_
|
||||
|
||||
#include <vector>
|
||||
#include "nnacl/int8/transpose_int8.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TransposeInt8CPUKernel : public LiteKernel {
|
||||
public:
|
||||
TransposeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
transpose_param_ = reinterpret_cast<TransposeParameter *>(op_parameter_);
|
||||
}
|
||||
~TransposeInt8CPUKernel() override;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
public:
|
||||
int DoTranspose(int task_id);
|
||||
|
||||
private:
|
||||
int MallocTmpBuf();
|
||||
void FreeTmpBuf();
|
||||
|
||||
private:
|
||||
TransposeParameter *transpose_param_;
|
||||
int8_t *in_ptr_ = nullptr;
|
||||
int8_t *out_ptr_ = nullptr;
|
||||
int *in_shape_ = nullptr;
|
||||
int *out_shape_ = nullptr;
|
||||
int *dim_size_ = nullptr;
|
||||
int *position_ = nullptr;
|
||||
bool extra_dims_ = false;
|
||||
int thread_num_ = 1;
|
||||
int thread_h_stride_ = 0;
|
||||
int thread_h_num_ = 0;
|
||||
int num_unit_ = 0;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_TRANSPOSE_INT8_H_
|
Loading…
Reference in new issue