split conv_fp32.c

pull/11984/head
chengyuanwang 4 years ago
parent df2da5679d
commit 5e48c5af6d

@ -0,0 +1,63 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/conv_common_fp32.h"
#include <string.h>
#include "nnacl/fp32/common_func_fp32.h"
#include "nnacl/fp32/matmul_fp32.h"
// fp32 conv common
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
int out_channel = conv_param->output_channel_;
int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
int output_count = conv_param->output_h_ * conv_param->output_w_;
#ifdef ENABLE_AVX
const int cal_num = C6NUM;
#elif defined(ENABLE_SSE)
const int cal_num = C4NUM;
#else
const int cal_num = C12NUM;
#endif
int output_tile_count = UP_DIV(output_count, cal_num);
for (int b = 0; b < conv_param->input_batch_; b++) {
int in_batch_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_;
int out_batch_offset = b * out_channel * output_count;
for (int thread_id = task_id; thread_id < output_tile_count; thread_id += conv_param->thread_num_) {
int start_index = thread_id * cal_num;
int real_cal_num = (output_count - start_index) < cal_num ? (output_count - start_index) : cal_num;
float *gemm_input = packed_input + task_id * deep * cal_num;
float *col_major_gemm_input = col_major_input + task_id * deep * cal_num;
size_t packed_input_size = deep * cal_num * sizeof(float);
memset(gemm_input, 0, packed_input_size);
memset(col_major_gemm_input, 0, packed_input_size);
Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
int out_offset = thread_id * cal_num * out_channel + out_batch_offset;
float *gemm_output = output_data + out_offset;
#ifdef ENABLE_AVX
RowMajor2Col6Major(gemm_input, col_major_gemm_input, cal_num, deep);
#elif defined(ENABLE_SSE)
RowMajor2Col4Major(gemm_input, col_major_gemm_input, cal_num, deep);
#else
RowMajor2Col12Major(gemm_input, col_major_gemm_input, cal_num, deep);
#endif
MatMulOpt(col_major_gemm_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_num,
out_channel, out_channel, OutType_Nhwc);
}
}
}

@ -0,0 +1,40 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_
#define MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/pack.h"
#include "nnacl/op_base.h"
#include "nnacl/common_func.h"
#include "nnacl/conv_parameter.h"
#ifdef __cplusplus
extern "C" {
#endif
// fp32 convolution common (im2col+gemm)
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_

@ -15,6 +15,7 @@
*/
#include "nnacl/fp32/conv_depthwise_fp32.h"
#include "nnacl/common_func.h"
#include "nnacl/fp32/common_func_fp32.h"
#include "nnacl/winograd_transform.h"
#ifdef ENABLE_ARM64

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,55 +14,12 @@
* limitations under the License.
*/
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/fp32/conv_winograd_fp32.h"
#include <string.h>
#include "nnacl/fp32/common_func_fp32.h"
#include "nnacl/winograd_transform.h"
#include "nnacl/fp32/matmul_fp32.h"
// fp32 conv common
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
int out_channel = conv_param->output_channel_;
int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
int output_count = conv_param->output_h_ * conv_param->output_w_;
#ifdef ENABLE_AVX
const int cal_num = C6NUM;
#elif defined(ENABLE_SSE)
const int cal_num = C4NUM;
#else
const int cal_num = C12NUM;
#endif
int output_tile_count = UP_DIV(output_count, cal_num);
for (int b = 0; b < conv_param->input_batch_; b++) {
int in_batch_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_;
int out_batch_offset = b * out_channel * output_count;
for (int thread_id = task_id; thread_id < output_tile_count; thread_id += conv_param->thread_num_) {
int start_index = thread_id * cal_num;
int real_cal_num = (output_count - start_index) < cal_num ? (output_count - start_index) : cal_num;
float *gemm_input = packed_input + task_id * deep * cal_num;
float *col_major_gemm_input = col_major_input + task_id * deep * cal_num;
size_t packed_input_size = deep * cal_num * sizeof(float);
memset(gemm_input, 0, packed_input_size);
memset(col_major_gemm_input, 0, packed_input_size);
Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
int out_offset = thread_id * cal_num * out_channel + out_batch_offset;
float *gemm_output = output_data + out_offset;
#ifdef ENABLE_AVX
RowMajor2Col6Major(gemm_input, col_major_gemm_input, cal_num, deep);
#elif defined(ENABLE_SSE)
RowMajor2Col4Major(gemm_input, col_major_gemm_input, cal_num, deep);
#else
RowMajor2Col12Major(gemm_input, col_major_gemm_input, cal_num, deep);
#endif
MatMulOpt(col_major_gemm_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_num,
out_channel, out_channel, OutType_Nhwc);
}
}
}
// fp32 conv winograd
void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_H_
#define MINDSPORE_LITE_NNACL_FP32_CONV_H_
#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_
#define MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
@ -33,10 +33,6 @@ typedef float *TmpBufferAddress;
extern "C" {
#endif
// fp32 convolution common (im2col+gemm)
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);
// fp32 convolution winograd
void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,
@ -45,4 +41,4 @@ void ConvWinogardFp32(const float *input_data, const float *trans_weight, const
}
#endif
#endif // MINDSPORE_LITE_NNACL_FP32_CONV_H_
#endif // MINDSPORE_LITE_NNACL_FP32_CONV_WINOGRAD_H_

@ -22,7 +22,6 @@
#include "nnacl/conv_parameter.h"
#include "nnacl/errorcode.h"
#include "nnacl/fp32/common_func_fp32.h"
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/minimal_filtering_generator.h"
#ifdef __cplusplus

@ -22,7 +22,6 @@
#endif
#include <string.h>
#include "nnacl/pack.h"
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/winograd_utils.h"
#include "mindspore/lite/nnacl/int8/fixed_point.h"

@ -21,7 +21,6 @@
#include "src/lite_kernel.h"
#include "nnacl/op_base.h"
#include "src/runtime/kernel/arm/fp32/convolution_fp32.h"
#include "nnacl/fp32/conv_fp32.h"
namespace mindspore::kernel {
class AdderCPUKernel : public ConvolutionCPUKernel {

@ -19,7 +19,7 @@
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/runtime_api.h"
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/fp32/conv_common_fp32.h"
#include "nnacl/fp32/matmul_fp32.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;

@ -21,7 +21,6 @@
#include "src/lite_kernel.h"
#include "nnacl/op_base.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp32/conv_fp32.h"
namespace mindspore::kernel {
class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {

@ -15,7 +15,7 @@
*/
#include "src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h"
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/fp32/conv_winograd_fp32.h"
#include "nnacl/pack.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
@ -85,7 +85,7 @@ int ConvolutionWinogradCPUKernel::InitWeightBias() {
}
ret = WinogradFilterTransform(origin_weight_, matrix_g, matrix_gt, oc_block);
if (ret != RET_OK) {
MS_LOG(ERROR) << "winograd filter transfrom failed.";
MS_LOG(ERROR) << "winograd filter transform failed.";
return ret;
}

@ -21,6 +21,7 @@
#include "src/lite_kernel.h"
#include "nnacl/winograd_transform.h"
#include "nnacl/minimal_filtering_generator.h"
#include "nnacl/fp32/conv_winograd_fp32.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
namespace mindspore::kernel {

@ -22,7 +22,7 @@
#include "src/lite_kernel.h"
#include "nnacl/op_base.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp32/conv_fp32.h"
#include "nnacl/fp32/conv_common_fp32.h"
namespace mindspore::kernel {
class GroupConvolutionCPUKernel : public ConvolutionBaseCPUKernel {

Loading…
Cancel
Save