diff --git a/mindspore/lite/nnacl/fp32/conv_common_fp32.c b/mindspore/lite/nnacl/fp32/conv_common_fp32.c new file mode 100644 index 0000000000..fd314aae4b --- /dev/null +++ b/mindspore/lite/nnacl/fp32/conv_common_fp32.c @@ -0,0 +1,63 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp32/conv_common_fp32.h" +#include +#include "nnacl/fp32/common_func_fp32.h" +#include "nnacl/fp32/matmul_fp32.h" + +// fp32 conv common +void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, + float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) { + int out_channel = conv_param->output_channel_; + int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + int output_count = conv_param->output_h_ * conv_param->output_w_; +#ifdef ENABLE_AVX + const int cal_num = C6NUM; +#elif defined(ENABLE_SSE) + const int cal_num = C4NUM; +#else + const int cal_num = C12NUM; +#endif + int output_tile_count = UP_DIV(output_count, cal_num); + + for (int b = 0; b < conv_param->input_batch_; b++) { + int in_batch_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_; + int out_batch_offset = b * out_channel * output_count; + for (int thread_id = task_id; thread_id < output_tile_count; thread_id += conv_param->thread_num_) { + int start_index = thread_id * cal_num; + int real_cal_num = (output_count - start_index) < cal_num ? (output_count - start_index) : cal_num; + float *gemm_input = packed_input + task_id * deep * cal_num; + float *col_major_gemm_input = col_major_input + task_id * deep * cal_num; + size_t packed_input_size = deep * cal_num * sizeof(float); + memset(gemm_input, 0, packed_input_size); + memset(col_major_gemm_input, 0, packed_input_size); + Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index); + + int out_offset = thread_id * cal_num * out_channel + out_batch_offset; + float *gemm_output = output_data + out_offset; +#ifdef ENABLE_AVX + RowMajor2Col6Major(gemm_input, col_major_gemm_input, cal_num, deep); +#elif defined(ENABLE_SSE) + RowMajor2Col4Major(gemm_input, col_major_gemm_input, cal_num, deep); +#else + RowMajor2Col12Major(gemm_input, col_major_gemm_input, cal_num, deep); +#endif + MatMulOpt(col_major_gemm_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_num, + out_channel, out_channel, OutType_Nhwc); + } + } +} diff --git a/mindspore/lite/nnacl/fp32/conv_common_fp32.h b/mindspore/lite/nnacl/fp32/conv_common_fp32.h new file mode 100644 index 0000000000..fe5bb1b9d1 --- /dev/null +++ b/mindspore/lite/nnacl/fp32/conv_common_fp32.h @@ -0,0 +1,40 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_ +#define MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_ + +#ifdef ENABLE_NEON +#include +#endif +#include "nnacl/pack.h" +#include "nnacl/op_base.h" +#include "nnacl/common_func.h" +#include "nnacl/conv_parameter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// fp32 convolution common (im2col+gemm) +void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, + float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param); + +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_ diff --git a/mindspore/lite/nnacl/fp32/conv_depthwise_fp32.c b/mindspore/lite/nnacl/fp32/conv_depthwise_fp32.c index 3f0d86e6cb..3e088002cb 100644 --- a/mindspore/lite/nnacl/fp32/conv_depthwise_fp32.c +++ b/mindspore/lite/nnacl/fp32/conv_depthwise_fp32.c @@ -15,6 +15,7 @@ */ #include "nnacl/fp32/conv_depthwise_fp32.h" +#include "nnacl/common_func.h" #include "nnacl/fp32/common_func_fp32.h" #include "nnacl/winograd_transform.h" #ifdef ENABLE_ARM64 diff --git a/mindspore/lite/nnacl/fp32/conv_fp32.c b/mindspore/lite/nnacl/fp32/conv_winograd_fp32.c similarity index 64% rename from mindspore/lite/nnacl/fp32/conv_fp32.c rename to mindspore/lite/nnacl/fp32/conv_winograd_fp32.c index 5e11a2cd65..9c1311c1e1 100644 --- a/mindspore/lite/nnacl/fp32/conv_fp32.c +++ b/mindspore/lite/nnacl/fp32/conv_winograd_fp32.c @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,55 +14,12 @@ * limitations under the License. */ -#include "nnacl/fp32/conv_fp32.h" +#include "nnacl/fp32/conv_winograd_fp32.h" #include #include "nnacl/fp32/common_func_fp32.h" #include "nnacl/winograd_transform.h" #include "nnacl/fp32/matmul_fp32.h" -// fp32 conv common -void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, - float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) { - int out_channel = conv_param->output_channel_; - int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; - int output_count = conv_param->output_h_ * conv_param->output_w_; -#ifdef ENABLE_AVX - const int cal_num = C6NUM; -#elif defined(ENABLE_SSE) - const int cal_num = C4NUM; -#else - const int cal_num = C12NUM; -#endif - int output_tile_count = UP_DIV(output_count, cal_num); - - for (int b = 0; b < conv_param->input_batch_; b++) { - int in_batch_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_; - int out_batch_offset = b * out_channel * output_count; - for (int thread_id = task_id; thread_id < output_tile_count; thread_id += conv_param->thread_num_) { - int start_index = thread_id * cal_num; - int real_cal_num = (output_count - start_index) < cal_num ? (output_count - start_index) : cal_num; - float *gemm_input = packed_input + task_id * deep * cal_num; - float *col_major_gemm_input = col_major_input + task_id * deep * cal_num; - size_t packed_input_size = deep * cal_num * sizeof(float); - memset(gemm_input, 0, packed_input_size); - memset(col_major_gemm_input, 0, packed_input_size); - Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index); - - int out_offset = thread_id * cal_num * out_channel + out_batch_offset; - float *gemm_output = output_data + out_offset; -#ifdef ENABLE_AVX - RowMajor2Col6Major(gemm_input, col_major_gemm_input, cal_num, deep); -#elif defined(ENABLE_SSE) - RowMajor2Col4Major(gemm_input, col_major_gemm_input, cal_num, deep); -#else - RowMajor2Col12Major(gemm_input, col_major_gemm_input, cal_num, deep); -#endif - MatMulOpt(col_major_gemm_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_num, - out_channel, out_channel, OutType_Nhwc); - } - } -} - // fp32 conv winograd void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, diff --git a/mindspore/lite/nnacl/fp32/conv_fp32.h b/mindspore/lite/nnacl/fp32/conv_winograd_fp32.h similarity index 73% rename from mindspore/lite/nnacl/fp32/conv_fp32.h rename to mindspore/lite/nnacl/fp32/conv_winograd_fp32.h index a7736c8b52..aaa8402985 100644 --- a/mindspore/lite/nnacl/fp32/conv_fp32.h +++ b/mindspore/lite/nnacl/fp32/conv_winograd_fp32.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_H_ -#define MINDSPORE_LITE_NNACL_FP32_CONV_H_ +#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_ +#define MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_ #ifdef ENABLE_NEON #include @@ -33,10 +33,6 @@ typedef float *TmpBufferAddress; extern "C" { #endif -// fp32 convolution common (im2col+gemm) -void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, - float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param); - // fp32 convolution winograd void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, @@ -45,4 +41,4 @@ void ConvWinogardFp32(const float *input_data, const float *trans_weight, const } #endif -#endif // MINDSPORE_LITE_NNACL_FP32_CONV_H_ +#endif // MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_ diff --git a/mindspore/lite/nnacl/fp32/deconv_fp32.h b/mindspore/lite/nnacl/fp32/deconv_fp32.h index aa8869f974..ce53163484 100644 --- a/mindspore/lite/nnacl/fp32/deconv_fp32.h +++ b/mindspore/lite/nnacl/fp32/deconv_fp32.h @@ -22,7 +22,6 @@ #include "nnacl/conv_parameter.h" #include "nnacl/errorcode.h" #include "nnacl/fp32/common_func_fp32.h" -#include "nnacl/fp32/conv_fp32.h" #include "nnacl/minimal_filtering_generator.h" #ifdef __cplusplus diff --git a/mindspore/lite/nnacl/winograd_transform.h b/mindspore/lite/nnacl/winograd_transform.h index 98a0a00f03..39b4961e42 100644 --- a/mindspore/lite/nnacl/winograd_transform.h +++ b/mindspore/lite/nnacl/winograd_transform.h @@ -22,7 +22,6 @@ #endif #include #include "nnacl/pack.h" -#include "nnacl/fp32/conv_fp32.h" #include "nnacl/winograd_utils.h" #include "mindspore/lite/nnacl/int8/fixed_point.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h index 468e369793..356a8ca2b1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h @@ -21,7 +21,6 @@ #include "src/lite_kernel.h" #include "nnacl/op_base.h" #include "src/runtime/kernel/arm/fp32/convolution_fp32.h" -#include "nnacl/fp32/conv_fp32.h" namespace mindspore::kernel { class AdderCPUKernel : public ConvolutionCPUKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc index 06262b8a9a..fbaa442633 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc @@ -19,7 +19,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/runtime_api.h" -#include "nnacl/fp32/conv_fp32.h" +#include "nnacl/fp32/conv_common_fp32.h" #include "nnacl/fp32/matmul_fp32.h" using mindspore::kernel::KERNEL_ARCH::kCPU; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h index e3eb5a5649..e1beb5b500 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h @@ -21,7 +21,6 @@ #include "src/lite_kernel.h" #include "nnacl/op_base.h" #include "src/runtime/kernel/arm/base/convolution_base.h" -#include "nnacl/fp32/conv_fp32.h" namespace mindspore::kernel { class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc index f6282df105..ed491047b2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc @@ -15,7 +15,7 @@ */ #include "src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h" -#include "nnacl/fp32/conv_fp32.h" +#include "nnacl/fp32/conv_winograd_fp32.h" #include "nnacl/pack.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" @@ -85,7 +85,7 @@ int ConvolutionWinogradCPUKernel::InitWeightBias() { } ret = WinogradFilterTransform(origin_weight_, matrix_g, matrix_gt, oc_block); if (ret != RET_OK) { - MS_LOG(ERROR) << "winograd filter transfrom failed."; + MS_LOG(ERROR) << "winograd filter transform failed."; return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h index f9f2edb3c3..45ae9febd1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h @@ -21,6 +21,7 @@ #include "src/lite_kernel.h" #include "nnacl/winograd_transform.h" #include "nnacl/minimal_filtering_generator.h" +#include "nnacl/fp32/conv_winograd_fp32.h" #include "src/runtime/kernel/arm/base/convolution_base.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h index fdfe8dce70..a20e9db448 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h @@ -22,7 +22,7 @@ #include "src/lite_kernel.h" #include "nnacl/op_base.h" #include "src/runtime/kernel/arm/base/convolution_base.h" -#include "nnacl/fp32/conv_fp32.h" +#include "nnacl/fp32/conv_common_fp32.h" namespace mindspore::kernel { class GroupConvolutionCPUKernel : public ConvolutionBaseCPUKernel {