!8843 [MSLITE][Develop] fix code review

From: @sunsuodong
Reviewed-by: @ddwsky,@zhanghaibo5
Signed-off-by: @zhanghaibo5
pull/8843/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 3ee7c035d4

@ -23,7 +23,7 @@ void Calculate_Data(const float *input_data, float *output_data, int num, EluPar
}
int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) {
for (size_t i = task_id; i < parameter->in_size_; i += parameter->thread_num_) {
for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) {
Calculate_Data(input_data, output_data, i, parameter);
}
return NNACL_OK;

@ -22,7 +22,6 @@
typedef struct EluParameter {
OpParameter op_parameter_;
float alpha_;
int thread_num_;
int in_size_;
} EluParameter;

@ -47,7 +47,7 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding
}
int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) {
for (size_t i = task_id; i < parameter->ids_size_; i += parameter->thread_num) {
for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
int ret = CopyData(input_data, ids, output_data, i, parameter);
if (ret != NNACL_OK) {
return ret;

@ -26,7 +26,6 @@ typedef struct EmbeddingLookupParameter {
int ids_size_;
int layer_size_;
int layer_num_;
int thread_num;
} EmbeddingLookupParameter;
#ifdef __cplusplus

@ -40,7 +40,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
}
}
int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
if (conv_param_->input_channel_ % C8NUM != 0) {
need_align_ = true;
int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@ -142,19 +142,17 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) {
}
int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
auto ret = InitBuffer();
auto ret = InitPackedInputOutput();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
MS_LOG(ERROR) << "Convolution depthwise fp16 InitPackedInputOutput failed.";
FreePackedInputOutput();
return ret;
}
ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
FreePackedInputOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
@ -173,11 +171,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
if (need_align_) {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_OK;
FreePackedInputOutput();
return ret;
}
void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() {
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
}
} // namespace mindspore::kernel

@ -45,11 +45,12 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
int ReSize() override;
int Run() override;
int InitBuffer();
int InitPackedInputOutput();
int InitWeightBias();
int Execute(int task_id);
private:
void FreePackedInputOutput();
SlidingWindowParam *sliding_ = nullptr;
float16_t *packed_weight_ = nullptr;
float16_t *packed_input_ = nullptr;

@ -53,7 +53,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
return RET_OK;
}
int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() {
int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() {
if (conv_param_->input_channel_ % C8NUM != 0) {
need_align_ = true;
int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@ -156,19 +156,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = InitBuffer();
auto ret = InitPackedInputOutput();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed.";
FreePackedInputOutput();
return RET_ERROR;
}
ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
FreePackedInputOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
@ -191,14 +189,22 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
if (need_align_) {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreePackedInputOutput();
return ret;
}
void DeconvolutionDepthwiseFp16CPUKernel::FreePackedInputOutput() {
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
}
kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,

@ -46,12 +46,13 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
int ReSize() override;
int Run() override;
int InitBuffer();
int InitPackedInputOutput();
int InitWeightBias();
int InitSlideParam();
int Execute(int task_id);
private:
void FreePackedInputOutput();
SlidingWindowParam *sliding_ = nullptr;
float16_t *packed_weight_ = nullptr;
float16_t *packed_input_ = nullptr;

@ -183,7 +183,7 @@ int DeConvolutionFp16CPUKernel::Run() {
int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();
return RET_ERROR;
@ -197,7 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() {
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
}
}

@ -70,7 +70,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
return RET_OK;
}
int ConvolutionDepthwiseSWCPUKernel::InitBuffer() {
int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
if (conv_param_->input_channel_ % C4NUM != 0) {
need_align_ = true;
int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@ -134,9 +134,10 @@ int ConvDwSWRun(void *cdata, int task_id) {
}
int ConvolutionDepthwiseSWCPUKernel::Run() {
auto ret = InitBuffer();
auto ret = InitPackedInputOutput();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
MS_LOG(ERROR) << "Convolution depthwise fp32 InitPackedInputOutput failed.";
FreePackedInputOutput();
return RET_ERROR;
}
auto input_tensor = in_tensors_.at(kInputIndex);
@ -159,16 +160,22 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
return RET_ERROR;
}
if (need_align_) {
PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
}
FreePackedInputOutput();
return ret;
}
void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() {
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
return RET_OK;
}
} // namespace mindspore::kernel

@ -35,11 +35,12 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
int ReSize() override;
int Run() override;
int InitBuffer();
int InitWeightBias();
int Execute(int task_id);
private:
int InitPackedInputOutput();
void FreePackedInputOutput();
SlidingWindowParam *sliding_ = nullptr;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;

@ -146,21 +146,20 @@ int ConvolutionCPUKernel::Run() {
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
FreeTmpBuffer();
return RET_ERROR;
}
int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
FreeTmpBuffer();
return RET_ERROR;
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
}
FreeTmpBuffer();
return RET_OK;
return ret;
}
ConvParameter *CreateNewConvParameter(ConvParameter *parameter) {
auto conv_parameter = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
auto conv_parameter = new (std::nothrow) ConvParameter;
if (conv_parameter == nullptr) {
MS_LOG(ERROR) << "Malloc new conv parameter failed.";
return nullptr;

@ -222,17 +222,16 @@ int ConvolutionWinogradCPUKernel::Run() {
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
FreeTmpBuffer();
return RET_ERROR;
}
int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
FreeTmpBuffer();
return RET_ERROR;
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
}
FreeTmpBuffer();
return RET_OK;
return ret;
}
} // namespace mindspore::kernel

@ -82,7 +82,7 @@ int DeconvolutionDepthwiseCPUKernel::InitWeightBias() {
return RET_OK;
}
int DeconvolutionDepthwiseCPUKernel::InitBuffer() {
int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
if (conv_param_->input_channel_ % C4NUM != 0) {
need_align_ = true;
int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@ -151,9 +151,10 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
return RET_ERROR;
}
auto ret = InitBuffer();
auto ret = InitPackedInputOutput();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitPackedInputOutput failed.ret: " << ret;
FreePackedInputOutput();
return ret;
}
@ -176,16 +177,23 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
return RET_ERROR;
}
if (need_align_) {
PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
}
FreePackedInputOutput();
return ret;
}
void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() {
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
return RET_OK;
}
kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,

@ -36,11 +36,12 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
int ReSize() override;
int Run() override;
int InitBuffer();
int InitWeightBias();
int Execute(int task_id);
private:
int InitPackedInputOutput();
void FreePackedInputOutput();
SlidingWindowParam *sliding_ = nullptr;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;

@ -202,6 +202,7 @@ int DeConvolutionCPUKernel::Run() {
int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
FreeRunBuf();
return error_code;
}
@ -218,6 +219,7 @@ int DeConvolutionCPUKernel::Run() {
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
FreeRunBuf();
return error_code;
}
}

@ -390,6 +390,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
auto ret = InitRunBuf();
if (ret != RET_OK) {
MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret;
FreeRunBuf();
return ret;
}
@ -410,5 +411,4 @@ int DeConvolutionWinogradCPUKernel::Run() {
FreeRunBuf();
return RET_OK;
}
} // namespace mindspore::kernel

@ -26,13 +26,9 @@ using mindspore::schema::PrimitiveType_Elu;
namespace mindspore::kernel {
int EluCPUKernel::Init() {
elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
elu_parameter_->thread_num_ = thread_count_;
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}
@ -42,6 +38,8 @@ int EluCPUKernel::ReSize() {
}
int EluCPUKernel::DoExcute(int task_id) {
auto input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
Elu(input_addr, output_addr, elu_parameter_, task_id);
return RET_OK;
}
@ -57,10 +55,7 @@ int EluRun(void *cdata, int task_id) {
}
int EluCPUKernel::Run() {
input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
return RET_ERROR;
@ -72,16 +67,6 @@ kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector<lite::Tensor *> &i
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::InnerContext *ctx, const KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (parameter == nullptr) {
MS_LOG(ERROR) << "parameter is nullptr";
return nullptr;
}
if (ctx == nullptr) {
MS_LOG(ERROR) << "ctx is nullptr";
free(parameter);
return nullptr;
}
MS_ASSERT(desc.type == PrimitiveType_Elu);
auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_;

@ -24,25 +24,21 @@
namespace mindspore::kernel {
class EluCPUKernel : public LiteKernel {
public:
explicit EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
~EluCPUKernel() override{};
EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
}
~EluCPUKernel() = default;
int Init() override;
int ReSize() override;
int Run() override;
int DoExcute(int task_id);
protected:
const lite::InnerContext *ctx_ = nullptr;
int thread_count_ = 1;
EluParameter *elu_parameter_ = nullptr;
private:
float *input_addr = nullptr;
float *output_addr = nullptr;
EluParameter *elu_parameter_ = nullptr;
};
} // namespace mindspore::kernel

@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup;
namespace mindspore::kernel {
int EmbeddingLookupCPUKernel::Init() {
embedding_lookup_parameter_ = reinterpret_cast<EmbeddingLookupParameter *>(op_parameter_);
embedding_lookup_parameter_->thread_num = thread_count_;
if (!InferShapeDone()) {
return RET_OK;
}
@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() {
}
int EmbeddingLookupCPUKernel::ReSize() {
embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum();
embedding_lookup_parameter_->layer_size_ = 1;
param_->ids_size_ = in_tensors_.back()->ElementsNum();
param_->layer_size_ = 1;
auto in_shape = in_tensors_.front()->shape();
for (size_t i = 1; i < in_shape.size(); ++i) {
embedding_lookup_parameter_->layer_size_ *= in_shape[i];
param_->layer_size_ *= in_shape[i];
}
embedding_lookup_parameter_->layer_num_ = 0;
param_->layer_num_ = 0;
for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
param_->layer_num_ += in_tensors_[i]->shape()[0];
}
return RET_OK;
}
int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id);
auto ids_addr = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]";
return RET_ERROR;
@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
}
int EmbeddingLookupRun(void *cdata, int task_id) {
auto EmbeddingLookupData = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
auto ret = EmbeddingLookupData->DoExcute(task_id);
auto kernel = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
auto ret = kernel->DoExcute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) {
int EmbeddingLookupCPUKernel::Run() {
MS_ASSERT(context_->allocator != nullptr);
input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
embedding_lookup_parameter_->is_regulated_ =
reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
input_addr_ =
reinterpret_cast<float *>(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_));
param_->is_regulated_ = reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_));
if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(input_addr_);
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
FreeRunBuff();
return RET_ERROR;
}
for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
for (int i = 0; i < param_->layer_num_; ++i) {
param_->is_regulated_[i] = param_->max_norm_ == 0;
}
int dest_loc = 0;
for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData());
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
dest_loc += in_tensors_.at(i)->ElementsNum();
}
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
auto ret =
ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
context_->allocator->Free(input_addr_);
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
FreeRunBuff();
if (ret != RET_OK) {
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
}
return ret;
}
void EmbeddingLookupCPUKernel::FreeRunBuff() {
context_->allocator->Free(input_addr_);
context_->allocator->Free(param_->is_regulated_);
input_addr_ = nullptr;
param_->is_regulated_ = nullptr;
}
kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *parameter, const lite::InnerContext *ctx,

@ -27,30 +27,20 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
~EmbeddingLookupCPUKernel() override {
if (input_addr_ != nullptr) {
free(input_addr_);
}
if (embedding_lookup_parameter_->is_regulated_ != nullptr) {
free(embedding_lookup_parameter_->is_regulated_);
}
};
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
param_ = reinterpret_cast<EmbeddingLookupParameter *>(parameter);
}
~EmbeddingLookupCPUKernel() = default;
int Init() override;
int ReSize() override;
int Run() override;
int DoExcute(int task_id);
protected:
const lite::InnerContext *ctx_ = nullptr;
int thread_count_ = 1;
EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr;
private:
void FreeRunBuff();
EmbeddingLookupParameter *param_ = nullptr;
float *input_addr_ = nullptr;
float *output_addr_ = nullptr;
int *ids_addr_ = nullptr;
};
} // namespace mindspore::kernel

@ -44,7 +44,9 @@ void FullconnectionCPUKernel::FreeBuf() {
int FullconnectionCPUKernel::ReSize() {
FreeBuf();
int row = 1;
for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) row *= (out_tensors_[0]->shape())[i];
for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) {
row *= (out_tensors_[0]->shape())[i];
}
fc_param_->row_ = row;
fc_param_->col_ = out_tensors_[0]->shape().back();
fc_param_->deep_ = (in_tensors_[1]->shape())[1];

@ -56,13 +56,12 @@ int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
}
int InstanceNormRun(void *cdata, int task_id) {
auto InstanceNormData = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
auto ret = InstanceNormData->DoInstanceNorm(task_id);
auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
auto ret = kernel->DoInstanceNorm(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
return ret;
}
int InstanceNormCPUKernel::Run() {

@ -58,8 +58,8 @@ int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
}
int LayerNormRun(void *cdata, int task_id) {
auto LayerNormData = reinterpret_cast<LayerNormCPUKernel *>(cdata);
auto ret = LayerNormData->DoLayerNorm(task_id);
auto kernel = reinterpret_cast<LayerNormCPUKernel *>(cdata);
auto ret = kernel->DoLayerNorm(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;

@ -72,7 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
return RET_OK;
}
int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() {
int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
if (conv_param_->input_channel_ % C8NUM != 0) {
need_align_ = true;
@ -319,15 +319,10 @@ int ConvDwSWInt8Run(void *cdata, int task_id) {
}
int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
auto ret = InitBuffer();
auto ret = InitPackedInputOutput();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
FreePackedInputOutput();
return ret;
}
@ -353,12 +348,17 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
if (need_align_) {
PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
}
FreePackedInputOutput();
return ret;
}
void ConvolutionDepthwiseSWInt8CPUKernel::FreePackedInputOutput() {
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
return ret;
}
} // namespace mindspore::kernel

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save