|
|
|
@ -24,6 +24,10 @@ using mindspore::lite::RET_OK;
|
|
|
|
|
namespace mindspore::kernel {
|
|
|
|
|
Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
|
|
|
|
|
FreeTmpBuffer();
|
|
|
|
|
if (weight_ptr_ != nullptr) {
|
|
|
|
|
free(weight_ptr_);
|
|
|
|
|
weight_ptr_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (matmul_param_ != nullptr) {
|
|
|
|
|
delete matmul_param_;
|
|
|
|
|
matmul_param_ = nullptr;
|
|
|
|
@ -31,18 +35,10 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Convolution1x1CPUKernel::FreeTmpBuffer() {
|
|
|
|
|
if (weight_ptr_ != nullptr) {
|
|
|
|
|
free(weight_ptr_);
|
|
|
|
|
weight_ptr_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (pack_input_ != nullptr) {
|
|
|
|
|
free(pack_input_);
|
|
|
|
|
pack_input_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (pre_trans_input_ && input_ptr_ != nullptr) {
|
|
|
|
|
free(input_ptr_);
|
|
|
|
|
input_ptr_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -51,12 +47,7 @@ int Convolution1x1CPUKernel::ReSize() {
|
|
|
|
|
ConvolutionBaseCPUKernel::Init();
|
|
|
|
|
InitConv1x1MatmulParam();
|
|
|
|
|
|
|
|
|
|
int error_code = InitConv1x1BiasWeight();
|
|
|
|
|
if (error_code != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "Convolution base init failed.";
|
|
|
|
|
return error_code;
|
|
|
|
|
}
|
|
|
|
|
error_code = InitConv1x1Param();
|
|
|
|
|
int error_code = InitConv1x1Param();
|
|
|
|
|
if (error_code != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "Convolution base init failed.";
|
|
|
|
|
return error_code;
|
|
|
|
@ -76,40 +67,35 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int Convolution1x1CPUKernel::InitConv1x1BiasWeight() {
|
|
|
|
|
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
|
|
|
|
auto input_channel = filter_tensor->Channel();
|
|
|
|
|
auto output_channel = filter_tensor->Batch();
|
|
|
|
|
|
|
|
|
|
int size = UP_ROUND(output_channel, C8NUM) * sizeof(float);
|
|
|
|
|
bias_data_ = malloc(size);
|
|
|
|
|
if (bias_data_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
memset(bias_data_, 0, size);
|
|
|
|
|
if (in_tensors_.size() == 3) {
|
|
|
|
|
bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float));
|
|
|
|
|
if (bias_data_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float));
|
|
|
|
|
memcpy(bias_data_, in_tensors_[2]->Data(), conv_param_->output_channel_ * sizeof(float));
|
|
|
|
|
} else {
|
|
|
|
|
bias_data_ = nullptr;
|
|
|
|
|
memcpy(bias_data_, in_tensors_[kBiasIndex]->Data(), output_channel * sizeof(float));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
weight_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float)));
|
|
|
|
|
size = input_channel * UP_ROUND(output_channel, C8NUM) * sizeof(float);
|
|
|
|
|
weight_ptr_ = reinterpret_cast<float *>(malloc(size));
|
|
|
|
|
if (weight_ptr_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
memset(weight_ptr_, 0, matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float));
|
|
|
|
|
RowMajor2Col8Major(reinterpret_cast<float *>(in_tensors_[1]->Data()), weight_ptr_, matmul_param_->col_,
|
|
|
|
|
matmul_param_->deep_);
|
|
|
|
|
memset(weight_ptr_, 0, size);
|
|
|
|
|
RowMajor2Col8Major(reinterpret_cast<float *>(filter_tensor->Data()), weight_ptr_, output_channel, input_channel);
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int Convolution1x1CPUKernel::InitConv1x1Param() {
|
|
|
|
|
pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 ||
|
|
|
|
|
conv_param_->stride_w_ != 1);
|
|
|
|
|
if (pre_trans_input_) {
|
|
|
|
|
input_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
|
|
|
|
|
if (input_ptr_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
|
|
|
|
|
return RET_MEMORY_FAILED;
|
|
|
|
|
}
|
|
|
|
|
memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM));
|
|
|
|
|
thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM;
|
|
|
|
@ -140,6 +126,12 @@ int Convolution1x1CPUKernel::Init() {
|
|
|
|
|
if (!InferShapeDone()) {
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int error_code = InitConv1x1BiasWeight();
|
|
|
|
|
if (error_code != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "Convolution base init failed.";
|
|
|
|
|
return error_code;
|
|
|
|
|
}
|
|
|
|
|
return ReSize();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -177,6 +169,15 @@ int Convolution1x1CPUKernel::Run() {
|
|
|
|
|
auto src_in = reinterpret_cast<float *>(in_tensors_[0]->Data());
|
|
|
|
|
auto src_out = reinterpret_cast<float *>(out_tensors_[0]->Data());
|
|
|
|
|
|
|
|
|
|
if (pre_trans_input_) {
|
|
|
|
|
input_ptr_ =
|
|
|
|
|
reinterpret_cast<float *>(ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
|
|
|
|
|
if (input_ptr_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
|
|
|
|
|
return RET_MEMORY_FAILED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
|
|
|
|
|
Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
|
|
|
|
|
src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
|
|
|
|
@ -187,6 +188,11 @@ int Convolution1x1CPUKernel::Run() {
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pre_trans_input_) {
|
|
|
|
|
ctx_->allocator->Free(input_ptr_);
|
|
|
|
|
input_ptr_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|
} // namespace mindspore::kernel
|
|
|
|
|