fix code review fp32 int8

pull/8959/head
zhaozhenlong 4 years ago
parent 0e58beea2f
commit 1224fa13f3

@ -17,16 +17,15 @@
#include "nnacl/int8/squeeze_int8.h"
#include <string.h>
void Squeeze(int8_t **inputs, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, size_t osize) {
float output_scale = quant_Squeeze_parm->out_quant_args_.scale_;
void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, const int num) {
float output_scale = quant_Squeeze_parm->out_quant_args_->scale_;
const float output_inverse_scale = 1.f / output_scale;
QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_;
int output_zp = quant_Squeeze_parm->out_quant_args_.zp_;
int output_zp = quant_Squeeze_parm->out_quant_args_->zp_;
const int i = 0;
int8_t *input_ptr = inputs[0];
for (int j = task_id; j < osize; j += para_->op_parameter_.thread_num_) {
for (int j = task_id; j < num; j += para_->op_parameter_.thread_num_) {
float scale = input_quant[i].scale_ * output_inverse_scale;
float bias = -input_quant[i].zp_ * scale;
int32_t output_tmp = round(input_ptr[j] * scale + bias) + output_zp;

@ -23,8 +23,8 @@
#ifdef __cplusplus
extern "C" {
#endif
void Squeeze(int8_t **inputs, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, size_t osize);
void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, const int num);
#ifdef __cplusplus
}
#endif

@ -58,16 +58,8 @@ typedef struct ConcatQuantArg {
} ConcatQuantArg;
typedef struct SqueezeQuantArg {
int *input_sizes_;
int output_size_;
int **input_shapes_;
int *output_shape_;
float alpha;
int axis_;
size_t input_num_;
size_t output_dim_;
QuantArg *in_quant_args_;
QuantArg out_quant_args_;
QuantArg *out_quant_args_;
} SqueezeQuantArg;
typedef struct UnSqueezeQuantArg {
@ -254,8 +246,8 @@ typedef struct LeakyReluQuantArg {
PreluQuantArg quant_arg;
float slope_;
int64_t axis_;
const int *in_shape_;
const int *out_shape_;
int *in_shape_;
int *out_shape_;
int input_dim_;
int element_num;
} LeakyReluQuantArg;

@ -90,8 +90,10 @@ void LshProjectionCPUKernel::FreeKeys() {
if (param_->hash_buffs_ != nullptr) {
for (int i = 0; i < op_parameter_->thread_num_; i++) {
context_->allocator->Free(param_->hash_buffs_[i]);
param_->hash_buffs_[i] = nullptr;
}
context_->allocator->Free(param_->hash_buffs_);
param_->hash_buffs_ = nullptr;
}
}

@ -135,12 +135,14 @@ int LstmCPUKernel::ReSize() {
ret = InitWeightBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "LstmCPUKernel InitWeightBias error.";
FreeTmpBuffer();
return RET_ERROR;
}
ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "LstmCPUKernel InitBuffer error.";
FreeTmpBuffer();
return RET_ERROR;
}
return RET_OK;
@ -157,13 +159,18 @@ int LstmCPUKernel::Run() {
MS_ASSERT(output != nullptr);
auto input_ptr = reinterpret_cast<float *>(input->MutableData());
MS_ASSERT(input_ptr);
auto output_ptr = reinterpret_cast<float *>(output->MutableData());
MS_ASSERT(output_ptr);
auto output_hidden_state = out_tensors_[1];
memcpy(output_hidden_state->MutableData(), hidden_state->MutableData(), hidden_state->ElementsNum() * sizeof(float));
auto output_cell_state = out_tensors_[2];
memcpy(output_cell_state->MutableData(), cell_state->MutableData(), cell_state->ElementsNum() * sizeof(float));
MS_ASSERT(weight_h_ptr_);
MS_ASSERT(weight_i_ptr_);
MS_ASSERT(bias_ptr_);
MS_ASSERT(gate_buffer_);
Lstm(output_ptr, input_ptr, weight_i_ptr_, weight_h_ptr_, bias_ptr_,
reinterpret_cast<float *>(output_hidden_state->MutableData()),
reinterpret_cast<float *>(output_cell_state->MutableData()), gate_buffer_, lstm_parm_);

@ -258,6 +258,10 @@ int MatmulCPUKernel::RunImpl(int task_id) {
auto b = cur_b_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_;
auto c = cur_c_ptr_ + task_id * thread_stride_ * C8NUM;
auto bias = bias_ptr_ ? bias_ptr_ + task_id * thread_stride_ * C8NUM : NULL;
MS_ASSERT(cur_a_ptr_);
MS_ASSERT(b);
MS_ASSERT(c);
MS_ASSERT(bias);
if (is_vector_a_) {
MatVecMul(cur_a_ptr_, b, c, bias, ActType_No, params_->deep_, cur_oc);
} else {

@ -16,6 +16,7 @@
#include "src/runtime/kernel/arm/fp32/pad_fp32.h"
#include <string>
#include <cmath>
#include "src/kernel_registry.h"
#include "schema/model_generated.h"
#include "include/errorcode.h"
@ -111,10 +112,12 @@ int PadImpl(void *cdata, int task_id) {
int PadCPUKernel::RunImpl(int task_id) {
auto input = in_tensors_.at(0);
auto output = out_tensors_.at(0);
MS_ASSERT(input);
MS_ASSERT(output);
auto input_data = reinterpret_cast<float *>(input->MutableData());
auto output_data = reinterpret_cast<float *>(output->MutableData());
MS_ASSERT(input_data);
MS_ASSERT(output_data);
Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_);
return RET_OK;
@ -241,7 +244,7 @@ int PadCPUKernel::Run() {
auto output = out_tensors_.at(0);
int output_size = output->ElementsNum();
auto output_data = reinterpret_cast<float *>(output->MutableData());
if (pad_param_->constant_value_ - 0.0f < 1e-5) {
if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) {
memset(output_data, 0, output_size * sizeof(float));
} else {
for (auto i = 0; i < output_size; ++i) {

@ -51,7 +51,9 @@ int PowerCPUKernel::Run() {
int PowerCPUKernel::RunImpl(int task_id) {
auto x_addr = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
MS_ASSERT(x_addr);
auto output_addr = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
MS_ASSERT(output_addr);
auto size = in_tensors_[0]->ElementsNum();
int stride = UP_DIV(size, thread_count_);
int len = MSMIN(stride, size - stride * task_id);
@ -59,6 +61,7 @@ int PowerCPUKernel::RunImpl(int task_id) {
bool broadcast = true;
if (in_tensors_.size() == 2) {
exp_addr = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
MS_ASSERT(exp_addr);
broadcast = in_tensors_[0]->shape() == in_tensors_[1]->shape() ? false : true;
}
float *cur_exp = nullptr;

@ -111,7 +111,8 @@ int PReluCPUKernel::Run() {
auto input_tensor = in_tensors_[0];
ori_input_ = reinterpret_cast<float *>(input_tensor->MutableData());
output_data_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
MS_ASSERT(ori_input_);
MS_ASSERT(output_data_);
if (prelu_param_->channelShared) {
auto ret = ProcessShareChannelInput();
if (ret != RET_OK) {

@ -48,6 +48,7 @@ int RangeCPUKernel::Run() {
}
}
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
MS_ASSERT(output_ptr);
Range(output_ptr, start, limit, delta);
return RET_OK;
}

@ -33,6 +33,7 @@ int RankCPUKernel::ReSize() { return RET_OK; }
int RankCPUKernel::Run() {
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
MS_ASSERT(output_ptr);
auto in_shape = in_tensors_[0]->shape();
auto rank = in_shape.size();
Rank(output_ptr, rank);

@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/fp32/reduce_fp32.h"
#include <cmath>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
@ -144,9 +145,10 @@ int ReduceCPUKernel::Run() {
}
src_data_ = dst_data_;
}
if (reduce_param_->reduce_to_end_ && reduce_param_->coeff - 1.0f > 1e-5) {
if (reduce_param_->reduce_to_end_ && abs(reduce_param_->coeff - 1.0f) > 1e-5) {
ret = CalculateCoeffOutput();
if (ret != RET_OK) {
FreeTmpBuffer();
return ret;
}
}
@ -213,7 +215,7 @@ int ReduceCPUKernel::MallocTmpBuffer() {
}
void ReduceCPUKernel::FreeTmpBuffer() {
for (auto buffer : data_buffers_) {
for (auto &buffer : data_buffers_) {
if (buffer != nullptr) {
context_->allocator->Free(buffer);
buffer = nullptr;

@ -39,6 +39,8 @@ int ReshapeCPUKernel::Run() {
auto input_ptr = in_tensors_.at(kInputIndex)->MutableData();
auto output_ptr = out_tensors_.at(kOutputIndex)->MutableData();
size_t data_size = in_tensors_.at(kInputIndex)->Size();
MS_ASSERT(input_ptr);
MS_ASSERT(output_ptr);
Reshape(input_ptr, output_ptr, data_size);
return RET_OK;
}

@ -32,7 +32,7 @@ int ReverseSequenceCPUKernel::Init() {
void ReverseSequenceCPUKernel::ConvertAxisToPositive(const std::vector<int> shape, int *axis) {
if (axis != nullptr && *axis < 0) {
*axis += shape.size();
*axis += static_cast<int>(shape.size());
}
}
@ -91,7 +91,11 @@ int ReverseSequenceCPUKernel::Run() {
void *input1 = in_tensors_.at(1)->MutableData();
float *output = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
ReverseSequenceParameter *param = reinterpret_cast<ReverseSequenceParameter *>(op_parameter_);
MS_ASSERT(param);
param->is_seq_length_int32_ = in_tensors_.at(1)->data_type() == kNumberTypeInt32;
MS_ASSERT(input0);
MS_ASSERT(input1);
MS_ASSERT(output);
ReverseSequence(input0, input1, output, param);
return RET_OK;
}

@ -74,6 +74,11 @@ int ROIPoolingCPUKernel::ReSize() {
}
int ROIPoolingCPUKernel::DoExecute(int task_id) {
MS_ASSERT(in_ptr_);
MS_ASSERT(out_ptr_);
MS_ASSERT(roi_ptr_);
MS_ASSERT(max_c_);
MS_ASSERT(param_);
auto ret = ROIPooling(in_ptr_, out_ptr_, roi_ptr_, max_c_, task_id, param_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ROIPooling Execute error task_id[" << task_id << "] error_code[" << ret << "]";

@ -108,6 +108,7 @@ int ScatterNDCPUKernel::ReSize() {
}
int *indices_ptr = reinterpret_cast<int *>(indices->MutableData());
output_unit_offsets_.clear();
for (int i = 0; i < num_unit_; i++) {
int tmp_stride = 0;
for (int j = 0; j < indice_unit_rank; j++) {

@ -61,6 +61,7 @@ void ParseSentenceToWords(const StringPack &sentence, std::vector<StringPack> *w
int SkipGramCPUKernel::Run() {
skip_gram_parameter_ = reinterpret_cast<SkipGramParameter *>(op_parameter_);
MS_ASSERT(skip_gram_parameter_);
if (skip_gram_parameter_->ngram_size < 1) {
MS_LOG(ERROR) << "Skip Gram Parameter Error, NgramSize should be at least 1, get "
<< skip_gram_parameter_->ngram_size;

@ -65,6 +65,8 @@ int SliceCPUKernel::Init() {
int SliceCPUKernel::SliceParallelRun(int thread_id) {
const float *input_data = reinterpret_cast<const float *>(in_tensors_[0]->MutableData());
float *output_data = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
MS_ASSERT(input_data);
MS_ASSERT(output_data);
DoSlice(input_data, output_data, param_, thread_id);
return RET_OK;
}

@ -93,7 +93,9 @@ int SoftmaxLastAxisRun(void *cdata, int task_id) {
int SoftmaxCPUKernel::Run() {
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
MS_ASSERT(input_ptr);
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
MS_ASSERT(output_ptr);
int ret = RET_OK;
if (in_plane_size_ == 1) {
ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisRun, this, context_->thread_num_);
@ -101,7 +103,9 @@ int SoftmaxCPUKernel::Run() {
MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
}
} else {
MS_ASSERT(sum_data_);
memset(sum_data_, 0, in_plane_size_ * out_plane_size_ * sizeof(float));
MS_ASSERT(softmax_param_);
Softmax(input_ptr, output_ptr, sum_data_, softmax_param_);
}
return ret;

@ -40,8 +40,10 @@ int SpaceToBatchFp32Run(void *cdata, int task_id) {
int SpaceToBatchCPUKernel::ReSize() {
auto input_tensor = in_tensors_.at(0);
MS_ASSERT(input_tensor);
auto output_tensor = out_tensors_.at(0);
MS_ASSERT(output_tensor);
MS_ASSERT(param);
for (size_t i = 0; i < DIMENSION_4D; i++) {
param_->input_shape_[i] = input_tensor->shape().at(i);
param_->output_shape_[i] = output_tensor->shape().at(i);

@ -65,6 +65,9 @@ int SpaceToDepthCPUKernel::SpaceToDepth(int task_id) {
auto in_shape = in_tensors_[0]->shape();
auto out_shape = out_tensors_[0]->shape();
SpaceToDepthParameter *param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
MS_ASSERT(param);
MS_ASSERT(input_ptr_);
MS_ASSERT(output_ptr_);
auto ret = SpaceToDepthForNHWC(input_ptr_, output_ptr_, in_shape.data(), out_shape.data(), in_shape.size(),
param->block_size_, thread_offset, thread_offset + num_unit_thread);
if (ret != RET_OK) {
@ -93,7 +96,6 @@ int SpaceToDepthCPUKernel::Run() {
MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]";
return ret;
}
return RET_OK;
} else {
MS_LOG(ERROR) << "Only support NHWC now!";
return RET_ERROR;

@ -33,7 +33,9 @@ using mindspore::schema::PrimitiveType_SparseToDense;
namespace mindspore::kernel {
int SparseToDenseCPUKernel::Init() {
auto input2 = in_tensors_.at(2);
MS_ASSERT(input2);
auto input3 = in_tensors_.at(3);
MS_ASSERT(input3);
sparse_values = reinterpret_cast<float *>(input2->MutableData());
default_value = reinterpret_cast<float *>(input3->MutableData())[0];
if (input2->ElementsNum() == 1) {
@ -68,6 +70,10 @@ int SparseToDenseCPUKernel::DoExcute(int task_id) {
int index_start = task_id * count_unit_;
int index_end = index_start + real_dst_count;
int out_width = output_num / index_num;
MS_ASSERT(sparse_indices_vect);
MS_ASSERT(output_shape);
MS_ASSERT(sparse_values);
MS_ASSERT(output_data);
SparseToDense(sparse_indices_vect, output_shape, sparse_values, default_value, output_data, isScalar, index_start,
index_end, out_width);
return RET_OK;
@ -172,15 +178,16 @@ int SparseToDenseCPUKernel::Run() {
return RET_ERROR;
}
if (sparse_indices_vect != nullptr) {
for (int i = 0; i < index_num; i++) {
if (sparse_indices_vect[i] != nullptr) {
delete sparse_indices_vect[i];
}
}
if (sparse_indices_vect != nullptr) {
ctx_->allocator->Free(sparse_indices_vect);
sparse_indices_vect = nullptr;
}
return RET_OK;
}

@ -53,6 +53,7 @@ int SplitCPUKernel::Split(int task_id) {
return RET_OK;
}
int thread_offset = task_id * thread_n_stride_;
MS_ASSERT(input_ptr_);
auto ret =
DoSplit(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset, num_unit_thread, param);
if (ret != RET_OK) {

@ -34,6 +34,8 @@ int SqueezeCPUKernel::ReSize() { return RET_OK; }
int SqueezeCPUKernel::Run() {
mindspore::lite::STATUS ret = RET_ERROR;
size_t data_size = in_tensors_.front()->Size();
MS_ASSERT(input_ptr);
MS_ASSERT(output_ptr);
if (in_tensors_.front()->data_type() == kNumberTypeInt32) {
auto input_ptr = reinterpret_cast<int32_t *>(in_tensors_.front()->MutableData());
auto output_ptr = reinterpret_cast<int32_t *>(out_tensors_.front()->MutableData());

@ -47,22 +47,29 @@ int StackCPUKernel::Run() {
auto input0 = in_tensors_[0];
if (inputs_num == 1) {
auto *output_data = reinterpret_cast<int8_t *>(out_tensors_[0]->MutableData());
DoStackOneInput(reinterpret_cast<const int8_t *>(input0->MutableData()), output_data, input0->Size());
MS_ASSERT(output_data);
auto *input_data = reinterpret_cast<const int8_t *>(input0->MutableData());
MS_ASSERT(input_data);
DoStackOneInput(input_data, output_data, input0->Size());
return RET_OK;
}
auto input0_shape = in_tensors_[0]->shape();
if (in_tensors_[0]->data_type() == kNumberTypeFloat32 || in_tensors_[0]->data_type() == kNumberTypeFloat) {
auto *output_data = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
MS_ASSERT(output_data);
float *inputs[inputs_num];
for (size_t i = 0; i < inputs_num; ++i) {
inputs[i] = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
MS_ASSERT(inputs[i]);
}
DoStack(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
} else {
auto *output_data = reinterpret_cast<int32_t *>(out_tensors_[0]->MutableData());
MS_ASSERT(output_data);
int32_t *inputs[inputs_num];
for (size_t i = 0; i < inputs_num; ++i) {
inputs[i] = reinterpret_cast<int32_t *>(in_tensors_[i]->MutableData());
MS_ASSERT(inputs[i]);
}
DoStackInt32(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save