parent
df2da5679d
commit
5e48c5af6d
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
#include <string.h>
|
||||
#include "nnacl/fp32/common_func_fp32.h"
|
||||
#include "nnacl/fp32/matmul_fp32.h"
|
||||
|
||||
// fp32 conv common
|
||||
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
|
||||
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
|
||||
int out_channel = conv_param->output_channel_;
|
||||
int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
|
||||
int output_count = conv_param->output_h_ * conv_param->output_w_;
|
||||
#ifdef ENABLE_AVX
|
||||
const int cal_num = C6NUM;
|
||||
#elif defined(ENABLE_SSE)
|
||||
const int cal_num = C4NUM;
|
||||
#else
|
||||
const int cal_num = C12NUM;
|
||||
#endif
|
||||
int output_tile_count = UP_DIV(output_count, cal_num);
|
||||
|
||||
for (int b = 0; b < conv_param->input_batch_; b++) {
|
||||
int in_batch_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_;
|
||||
int out_batch_offset = b * out_channel * output_count;
|
||||
for (int thread_id = task_id; thread_id < output_tile_count; thread_id += conv_param->thread_num_) {
|
||||
int start_index = thread_id * cal_num;
|
||||
int real_cal_num = (output_count - start_index) < cal_num ? (output_count - start_index) : cal_num;
|
||||
float *gemm_input = packed_input + task_id * deep * cal_num;
|
||||
float *col_major_gemm_input = col_major_input + task_id * deep * cal_num;
|
||||
size_t packed_input_size = deep * cal_num * sizeof(float);
|
||||
memset(gemm_input, 0, packed_input_size);
|
||||
memset(col_major_gemm_input, 0, packed_input_size);
|
||||
Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
|
||||
|
||||
int out_offset = thread_id * cal_num * out_channel + out_batch_offset;
|
||||
float *gemm_output = output_data + out_offset;
|
||||
#ifdef ENABLE_AVX
|
||||
RowMajor2Col6Major(gemm_input, col_major_gemm_input, cal_num, deep);
|
||||
#elif defined(ENABLE_SSE)
|
||||
RowMajor2Col4Major(gemm_input, col_major_gemm_input, cal_num, deep);
|
||||
#else
|
||||
RowMajor2Col12Major(gemm_input, col_major_gemm_input, cal_num, deep);
|
||||
#endif
|
||||
MatMulOpt(col_major_gemm_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_num,
|
||||
out_channel, out_channel, OutType_Nhwc);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/pack.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// fp32 convolution common (im2col+gemm)
|
||||
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
|
||||
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_CONV_COMMON_H_
|
Loading…
Reference in new issue