!3936 [lite]compare,batch_to_space,depth_to_space,argmin,argmax support int8
Merge pull request !3936 from chenjianping/lite_devpull/3936/MERGE
commit
00fa08ba84
@ -0,0 +1,194 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/int8/arithmetic_int8.h"
|
||||
#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
|
||||
#include "src/runtime/kernel/arm/opclib/arithmetic_common.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_PARAM_INVALID;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
using mindspore::schema::PrimitiveType_Equal;
|
||||
using mindspore::schema::PrimitiveType_NotEqual;
|
||||
using mindspore::schema::PrimitiveType_LessEqual;
|
||||
using mindspore::schema::PrimitiveType_Greater;
|
||||
using mindspore::schema::PrimitiveType_GreaterEqual;
|
||||
using mindspore::schema::PrimitiveType_Less;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto arithmetic_kernel = reinterpret_cast<ArithmeticInt8CPUKernel *>(cdata);
|
||||
auto error_code = arithmetic_kernel->DoArithmetic(thread_id);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
|
||||
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
|
||||
if (!param->broadcasting_) {
|
||||
return;
|
||||
}
|
||||
if (context_->allocator != nullptr) {
|
||||
if (tile_data0_ != nullptr) {
|
||||
context_->allocator->Free(tile_data0_);
|
||||
}
|
||||
if (tile_data1_ != nullptr) {
|
||||
context_->allocator->Free(tile_data1_);
|
||||
}
|
||||
} else {
|
||||
if (tile_data0_ != nullptr) {
|
||||
free(tile_data0_);
|
||||
}
|
||||
if (tile_data1_ != nullptr) {
|
||||
free(tile_data1_);
|
||||
}
|
||||
}
|
||||
tile_data0_ = nullptr;
|
||||
tile_data1_ = nullptr;
|
||||
}
|
||||
|
||||
int ArithmeticInt8CPUKernel::Init() {
|
||||
switch (opParameter->type_) {
|
||||
case PrimitiveType_Equal:
|
||||
arithmetic_run_ = ElementEqual;
|
||||
break;
|
||||
case PrimitiveType_NotEqual:
|
||||
arithmetic_run_ = ElementNotEqual;
|
||||
break;
|
||||
case PrimitiveType_Less:
|
||||
arithmetic_run_ = ElementEqual;
|
||||
break;
|
||||
case PrimitiveType_LessEqual:
|
||||
arithmetic_run_ = ElementNotEqual;
|
||||
break;
|
||||
case PrimitiveType_Greater:
|
||||
arithmetic_run_ = ElementGreater;
|
||||
break;
|
||||
case PrimitiveType_GreaterEqual:
|
||||
arithmetic_run_ = ElementGreaterEqual;
|
||||
break;
|
||||
default:
|
||||
MS_LOG(ERROR) << "Error Operator type " << opParameter->type_;
|
||||
arithmetic_run_ = nullptr;
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
auto data_size = outputs_[0]->Size();
|
||||
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
|
||||
if (param->broadcasting_) {
|
||||
if (context_->allocator != nullptr) {
|
||||
tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
|
||||
tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
|
||||
} else {
|
||||
tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size));
|
||||
tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size));
|
||||
}
|
||||
} else {
|
||||
tile_data0_ = nullptr;
|
||||
tile_data1_ = nullptr;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
|
||||
auto input0_data = reinterpret_cast<int8_t *>(inputs_[0]->Data());
|
||||
auto input1_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
|
||||
auto output_data = reinterpret_cast<int8_t *>(outputs_[0]->Data());
|
||||
auto element_num = outputs_[0]->ElementsNum();
|
||||
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
|
||||
if (param->broadcasting_ && arithmetic_run_ != nullptr) {
|
||||
MS_ASSERT(thread_count_ != 0);
|
||||
int stride = UP_DIV(element_num, thread_count_);
|
||||
int count = MSMIN(stride, element_num - stride * thread_id);
|
||||
|
||||
int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id,
|
||||
output_data + stride * thread_id, count);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Arithmetic run fail! ret: " << error_code;
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (arithmetic_run_ != nullptr) {
|
||||
int error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code;
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "arithmetic_run function is nullptr!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticInt8CPUKernel::Run() {
|
||||
auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
|
||||
if (param->broadcasting_) {
|
||||
auto input_data0 = reinterpret_cast<int8_t *>(inputs_[0]->Data());
|
||||
auto input_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
|
||||
TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
|
||||
}
|
||||
int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *parameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
if (parameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input parameter is null!";
|
||||
return nullptr;
|
||||
}
|
||||
auto kernel = new (std::nothrow) ArithmeticInt8CPUKernel(parameter, inputs, outputs, ctx);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "Create ArithmeticInt8CPUKernel failed, name: " << parameter->name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Equal, CpuArithmeticInt8KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_NotEqual, CpuArithmeticInt8KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Less, CpuArithmeticInt8KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LessEqual, CpuArithmeticInt8KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Greater, CpuArithmeticInt8KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_GreaterEqual, CpuArithmeticInt8KernelCreator)
|
||||
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "schema/model_generated.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ArithmeticInt8CPUKernel : public LiteKernel {
|
||||
typedef int (*ArithmeticRunInt8)(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
public:
|
||||
ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_), context_(ctx) {}
|
||||
~ArithmeticInt8CPUKernel();
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int DoArithmetic(int thread_id);
|
||||
|
||||
private:
|
||||
int thread_count_;
|
||||
int8_t *tile_data0_;
|
||||
int8_t *tile_data1_;
|
||||
const lite::Context *context_;
|
||||
ArithmeticRunInt8 arithmetic_run_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
|
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
|
||||
struct DepthToSpaceParameter {
|
||||
OpParameter op_parameter_;
|
||||
int32_t block_size_;
|
||||
int32_t in_stride_dim0_;
|
||||
int32_t in_stride_dim1_;
|
||||
int32_t in_stride_dim2_;
|
||||
int32_t out_stride_dim0_;
|
||||
int32_t out_stride_dim1_;
|
||||
int32_t out_stride_dim2_;
|
||||
uint8_t data_type_size_;
|
||||
};
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
|
File diff suppressed because it is too large
Load Diff
@ -1,33 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
|
||||
|
||||
void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count);
|
||||
void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count);
|
||||
void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
|
@ -0,0 +1,221 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h"
|
||||
#include <float.h>
|
||||
|
||||
void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
|
||||
int *after_axis_count) {
|
||||
*pre_axis_count = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
*pre_axis_count = (*pre_axis_count) * shape[i];
|
||||
}
|
||||
|
||||
*axis_count = shape[axis];
|
||||
|
||||
*after_axis_count = 1;
|
||||
for (int i = axis + 1; i < dims_number; ++i) {
|
||||
*after_axis_count = (*after_axis_count) * shape[i];
|
||||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
for (int i = 0; i < pre_axis_count; ++i) {
|
||||
size_t output_offset = i * after_axis_count;
|
||||
size_t input_offset = output_offset * axis_count;
|
||||
for (int j = 0; j < after_axis_count; ++j) {
|
||||
float value = -FLT_MAX;
|
||||
if (!param->get_max_) {
|
||||
value = FLT_MAX;
|
||||
}
|
||||
float index = 0.0f;
|
||||
for (int k = 0; k < axis_count; ++k) {
|
||||
float value_tmp = input[input_offset + k * after_axis_count + j] * in_quant_arg->scale_ + bias;
|
||||
if (param->get_max_) {
|
||||
if (value_tmp > value) {
|
||||
value = value_tmp;
|
||||
index = k;
|
||||
}
|
||||
} else {
|
||||
if (value_tmp < value) {
|
||||
value = value_tmp;
|
||||
index = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
float real_out = out_value ? value : index;
|
||||
output[output_offset + j] = real_out * output_inverse_scale + output_zp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int pre_axis_count = 1;
|
||||
int axis_count = 1;
|
||||
int after_axis_count = 1;
|
||||
CalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count);
|
||||
ArgMinMaxQuant(input, output, param, pre_axis_count, axis_count, after_axis_count, in_quant_arg, out_quant_arg);
|
||||
return;
|
||||
}
|
||||
|
||||
int ArgCompareAscInt8(const void *a, const void *b) {
|
||||
return reinterpret_cast<const ArgElement *>(a)->data_.f_data_
|
||||
- reinterpret_cast<const ArgElement *>(b)->data_.f_data_;
|
||||
}
|
||||
|
||||
int ArgCompareDescInt8(const void *a, const void *b) {
|
||||
return reinterpret_cast<const ArgElement *>(b)->data_.f_data_
|
||||
- reinterpret_cast<const ArgElement *>(a)->data_.f_data_;
|
||||
}
|
||||
|
||||
int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_zp) {
|
||||
return real_out * output_inverse_scale + output_zp;
|
||||
}
|
||||
|
||||
void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||
for (int j = 0; j < in_shape[0]; ++j) {
|
||||
size_t offset = param->in_strides_[0] * j + i;
|
||||
param->arg_elements_[j].index_ = j;
|
||||
param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
|
||||
}
|
||||
if (param->get_max_) {
|
||||
qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8);
|
||||
} else {
|
||||
qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8);
|
||||
}
|
||||
|
||||
for (int j = 0; j < param->topk_; ++j) {
|
||||
size_t out_offset = j * param->out_strides_[0] + i;
|
||||
float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
|
||||
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
int in_shape1 = in_shape[1];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
size_t out_dim0_offset = i * param->out_strides_[0];
|
||||
for (int j = 0; j < param->in_strides_[1]; ++j) {
|
||||
for (int k = 0; k < in_shape1; ++k) {
|
||||
size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
|
||||
param->arg_elements_[k].index_ = k;
|
||||
param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
|
||||
}
|
||||
if (param->get_max_) {
|
||||
qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8);
|
||||
} else {
|
||||
qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8);
|
||||
}
|
||||
|
||||
for (int k = 0; k < param->topk_; ++k) {
|
||||
size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
|
||||
float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
|
||||
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
size_t out_dim0_offset = i * param->out_strides_[0];
|
||||
for (int j = 0; j < in_shape1; ++j) {
|
||||
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
|
||||
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
|
||||
for (int k = 0; k < param->in_strides_[2]; ++k) {
|
||||
for (int l = 0; l < in_shape2; ++l) {
|
||||
size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
|
||||
param->arg_elements_[l].index_ = l;
|
||||
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
|
||||
}
|
||||
if (param->get_max_) {
|
||||
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8);
|
||||
} else {
|
||||
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
|
||||
}
|
||||
for (int l = 0; l < param->topk_; ++l) {
|
||||
size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
|
||||
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
|
||||
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
int in_shape3 = in_shape[3];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
size_t out_dim0_offset = i * param->out_strides_[0];
|
||||
for (int j = 0; j < in_shape1; ++j) {
|
||||
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
|
||||
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
|
||||
for (int k = 0; k < in_shape2; ++k) {
|
||||
size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
|
||||
size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
|
||||
for (int l = 0; l < in_shape3; ++l) {
|
||||
size_t offset = l + in_dim2_offset;
|
||||
param->arg_elements_[l].index_ = l;
|
||||
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
|
||||
}
|
||||
if (param->get_max_) {
|
||||
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8);
|
||||
} else {
|
||||
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
|
||||
}
|
||||
for (int l = 0; l < param->topk_; ++l) {
|
||||
size_t out_offset = out_dim2_offset + l;
|
||||
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
|
||||
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
|
||||
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
|
||||
void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant, QuantArg *out_quant);
|
||||
void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant, QuantArg *out_quant);
|
||||
void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant, QuantArg *out_quant);
|
||||
void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant, QuantArg *out_quant);
|
||||
void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||
QuantArg *in_quant, QuantArg *out_quant);
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
|
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "src/runtime/kernel/arm/opclib/errorcode.h"
|
||||
|
||||
int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] != input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
||||
|
||||
int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] == input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
||||
|
||||
int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] < input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
||||
|
||||
int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] <= input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
||||
|
||||
int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] > input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
||||
|
||||
int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int index = 0; index < element_size; ++index) {
|
||||
output[index] = (int8_t)(input0[index] >= input1[index]);
|
||||
}
|
||||
return OPCLIB_OK;
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
|
||||
int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
|
||||
int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
|
@ -0,0 +1,111 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h"
|
||||
#include "src/runtime/kernel/arm/opclib/arithmetic_common.h"
|
||||
|
||||
void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_h = in_shape[1];
|
||||
int in_w = in_shape[2];
|
||||
int in_c = in_shape[3];
|
||||
size_t stride_h = block_w * out_n;
|
||||
size_t output_offset = 0;
|
||||
size_t in_stride_h = in_w * in_c;
|
||||
size_t in_stride_n = in_stride_h * in_h;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
|
||||
for (int n = 0; n < out_n; ++n) {
|
||||
for (int h = 0; h < in_h; ++h) {
|
||||
size_t h_offset = h * in_stride_h;
|
||||
for (int bh = 0; bh < block_h; ++bh) {
|
||||
for (int w = 0; w < in_w; ++w) {
|
||||
size_t w_offset = w * in_c;
|
||||
for (int bw = 0; bw < block_w; ++bw) {
|
||||
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
|
||||
for (int c = 0; c < in_c; ++c) {
|
||||
int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
|
||||
output_tmp = output_tmp > 127 ? 127 : output_tmp;
|
||||
output_tmp = output_tmp < -128 ? -128 : output_tmp;
|
||||
output[output_offset++] = output_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_n = in_shape[0];
|
||||
int in_h = in_shape[1];
|
||||
int in_w = in_shape[2];
|
||||
int in_c = in_shape[3];
|
||||
int h_start = crops[0] / block_h;
|
||||
int h_valid_begin = crops[0];
|
||||
int h_end = MSMIN((in_h * block_h - crops[1]) / block_h + 1, in_h);
|
||||
int h_valid_end = in_h * block_h - crops[1] - 1;
|
||||
int w_start = crops[2] / block_w;
|
||||
int w_valid_begin = crops[2];
|
||||
int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w);
|
||||
int w_valid_end = in_w * block_w - crops[3] - 1;
|
||||
|
||||
size_t stride_h = block_w * out_n;
|
||||
size_t output_offset = 0;
|
||||
size_t in_stride_h = in_w * in_c;
|
||||
size_t in_stride_n = in_stride_h * in_h;
|
||||
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
|
||||
for (int n = 0; n < out_n; ++n) {
|
||||
for (int h = h_start; h < h_end; ++h) {
|
||||
size_t h_offset = h * in_stride_h;
|
||||
for (int bh = 0; bh < block_h; ++bh) {
|
||||
size_t h_index = h * block_h + bh;
|
||||
if (h_index < h_valid_begin || h_index > h_valid_end) {
|
||||
continue;
|
||||
}
|
||||
for (int w = w_start; w < w_end; ++w) {
|
||||
size_t w_offset = w * in_c;
|
||||
for (int bw = 0; bw < block_w; ++bw) {
|
||||
size_t w_index = w * block_w + bw;
|
||||
if (w_index < w_valid_begin || w_index > w_valid_end) {
|
||||
continue;
|
||||
}
|
||||
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
|
||||
for (int c = 0; c < in_c; ++c) {
|
||||
int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
|
||||
output_tmp = output_tmp > 127 ? 127 : output_tmp;
|
||||
output_tmp = output_tmp < -128 ? -128 : output_tmp;
|
||||
output[output_offset++] = output_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
|
||||
void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg);
|
||||
void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg);
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
|
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h"
|
||||
#include <string.h>
|
||||
|
||||
void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int32_t block_size = param->block_size_;
|
||||
int32_t in_shape_dim2 = in_shape[2];
|
||||
int32_t in_shape_dim1 = in_shape[1];
|
||||
size_t copy_size = block_size * param->out_stride_dim2_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_offset_n = i * param->in_stride_dim0_;
|
||||
size_t out_offset_n = i * param->out_stride_dim0_;
|
||||
for (int j = 0; j < in_shape_dim1; ++j) {
|
||||
size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_;
|
||||
size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
|
||||
for (int k = 0; k < in_shape_dim2; ++k) {
|
||||
size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_;
|
||||
size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
|
||||
for (int l = 0; l < block_size; ++l) {
|
||||
size_t out_offset = out_offset_w + l * param->out_stride_dim1_;
|
||||
size_t in_offset = in_offset_w + l * block_size * param->out_stride_dim2_;
|
||||
for (int m = 0; m < copy_size; ++m) {
|
||||
int32_t output_tmp = round(input[in_offset + m] * scale + bias) + output_zp;
|
||||
output_tmp = output_tmp > 127 ? 127 : output_tmp;
|
||||
output_tmp = output_tmp < -128 ? -128 : output_tmp;
|
||||
output[out_offset + m] = output_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h"
|
||||
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
|
||||
void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg);
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,197 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/batch_to_space.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
class BatchToSpaceTestFp32 : public mindspore::Common {
|
||||
public:
|
||||
BatchToSpaceTestFp32() = default;
|
||||
};
|
||||
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest1) {
|
||||
float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
|
||||
constexpr int kOutSize = 12;
|
||||
float expect_out[kOutSize] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {4, 1, 1, 3};
|
||||
int out_n = 1;
|
||||
int block[2] = {2, 2};
|
||||
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_1) {
|
||||
float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
|
||||
constexpr int kOutSize = 3;
|
||||
float expect_out[kOutSize] = {5, 50, 150};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {4, 1, 1, 3};
|
||||
int out_n = 1;
|
||||
int block[2] = {2, 2};
|
||||
int crops[4] = {1, 0, 0, 1};
|
||||
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest2) {
|
||||
float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
|
||||
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
|
||||
constexpr int kOutSize = 32;
|
||||
float expect_out[kOutSize] = {1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80,
|
||||
9, 90, 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {4, 2, 2, 2};
|
||||
int out_n = 1;
|
||||
int block[2] = {2, 2};
|
||||
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_2) {
|
||||
float input[32] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
|
||||
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
|
||||
constexpr int kOutSize = 12;
|
||||
float expect_out[kOutSize] = {6, 60, 7, 70, 8, 80,
|
||||
10, 100, 11, 110, 12, 120};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {4, 2, 2, 2};
|
||||
int out_n = 1;
|
||||
int block[2] = {2, 2};
|
||||
int crops[4] = {1, 1, 1, 0};
|
||||
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest3) {
|
||||
float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
|
||||
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160,
|
||||
21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
|
||||
25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
|
||||
constexpr int kOutSize = 64;
|
||||
float expect_out[kOutSize] = {1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70,
|
||||
9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150,
|
||||
2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80,
|
||||
10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {8, 2, 2, 2};
|
||||
int out_n = 2;
|
||||
int block[2] = {2, 2};
|
||||
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
|
||||
for (int i = 0; i < kOutSize && i < 32; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_3) {
|
||||
float input[64] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
|
||||
5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160,
|
||||
21, 10, 23, 30, 29, 90, 211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
|
||||
25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
|
||||
constexpr int kOutSize = 16;
|
||||
float expect_out[kOutSize] = {9, 90, 13, 130, 29, 90, 213, 130,
|
||||
10, 100, 14, 140, 210, 100, 214, 140};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {8, 2, 2, 2};
|
||||
int out_n = 2;
|
||||
int block[2] = {2, 2};
|
||||
int crops[4] = {2, 0, 0, 2};
|
||||
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
|
||||
for (int i = 0; i < kOutSize && i < 32; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest4) {
|
||||
float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70,
|
||||
13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110,
|
||||
22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80,
|
||||
214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120,
|
||||
35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160};
|
||||
constexpr int kOutSize = 96;
|
||||
float expect_out[kOutSize] = {
|
||||
1, 10, 5, 50, 3, 30, 7, 70, 21, 10, 25, 50, 23, 30, 27, 70, 31, 10, 35, 50, 33, 30, 37, 70,
|
||||
9, 90, 13, 130, 11, 110, 15, 150, 29, 90, 213, 130, 211, 110, 215, 150, 39, 90, 313, 130, 311, 110, 315, 150,
|
||||
2, 20, 6, 60, 4, 40, 8, 80, 22, 20, 26, 60, 24, 40, 28, 80, 32, 20, 36, 60, 34, 40, 38, 80,
|
||||
10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160, 310, 100, 314, 140, 312, 120, 316, 160};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {12, 2, 2, 2};
|
||||
int out_n = 2;
|
||||
int block[2] = {3, 2};
|
||||
BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
|
||||
for (int i = 0; i < kOutSize && i < 32; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_4) {
|
||||
float input[96] = {1, 10, 3, 30, 9, 90, 11, 110, 2, 20, 4, 40, 10, 100, 12, 120, 5, 50, 7, 70,
|
||||
13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160, 21, 10, 23, 30, 29, 90, 211, 110,
|
||||
22, 20, 24, 40, 210, 100, 212, 120, 25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80,
|
||||
214, 140, 216, 160, 31, 10, 33, 30, 39, 90, 311, 110, 32, 20, 34, 40, 310, 100, 312, 120,
|
||||
35, 50, 37, 70, 313, 130, 315, 150, 36, 60, 38, 80, 314, 140, 316, 160};
|
||||
constexpr int kOutSize = 24;
|
||||
float expect_out[kOutSize] = {
|
||||
25, 50, 23, 30, 35, 50, 33, 30,
|
||||
13, 130, 11, 110, 26, 60, 24, 40, 36, 60, 34, 40, 14, 140, 12, 120};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {12, 2, 2, 2};
|
||||
int out_n = 2;
|
||||
int block[2] = {3, 2};
|
||||
int crops[4] = {1, 2, 1, 1};
|
||||
BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
|
||||
for (int i = 0; i < kOutSize && i < 32; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue