!14182 split file list

From: @zhujingxuan
Reviewed-by: 
Signed-off-by:
pull/14182/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 5ce8a53cb9

@ -112,9 +112,15 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) { int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) {
Collect(context, Collect(context,
{"nnacl/detection_post_process_parameter.h", "nnacl/fp32/detection_post_process_fp32.h", {
"wrapper/base/detection_post_process_base_wrapper.h"}, "nnacl/detection_post_process_parameter.h",
{"detection_post_process_fp32.c", "detection_post_process_base_wrapper.c"}); "nnacl/fp32/detection_post_process_fp32.h",
"wrapper/base/detection_post_process_base_wrapper.h",
},
{
"detection_post_process_fp32.c",
"detection_post_process_base_wrapper.c",
});
Serializer code; Serializer code;
MS_CHECK_RET_CODE(GetInputData(context, &code), "GetInputData failed"); MS_CHECK_RET_CODE(GetInputData(context, &code), "GetInputData failed");

@ -43,13 +43,27 @@ int DTypeCastCoder::DoCode(CoderContext *const context) {
TypeId input_data_type = input_tensor_->data_type(); TypeId input_data_type = input_tensor_->data_type();
TypeId output_data_type = output_tensor_->data_type(); TypeId output_data_type = output_tensor_->data_type();
std::vector<std::string> asmFiles; Collect(context,
{
"nnacl/fp32/cast.h",
},
{
"nnacl/fp32/cast.c",
"nnacl/fp32/common_func.c",
});
if (target_ == kARM32A) { if (target_ == kARM32A) {
asmFiles = {"nnacl/assembly/arm32/PostFuncBiasReluC8.S", "nnacl/assembly/arm32/PostFuncBiasReluC4.S"}; Collect(context, {}, {},
{
"nnacl/assembly/arm32/PostFuncBiasReluC8.S",
"nnacl/assembly/arm32/PostFuncBiasReluC4.S",
});
} else if (target_ == kARM64) { } else if (target_ == kARM64) {
asmFiles = {"nnacl/assembly/arm64/PostFuncBiasReluC8.S", "nnacl/assembly/arm64/PostFuncBiasReluC4.S"}; Collect(context, {}, {},
{
"nnacl/assembly/arm64/PostFuncBiasReluC8.S",
"nnacl/assembly/arm64/PostFuncBiasReluC4.S",
});
} }
Collect(context, {"nnacl/fp32/cast.h"}, {"nnacl/fp32/cast.c", "nnacl/fp32/common_func.c"}, asmFiles);
Serializer code; Serializer code;
if (output_data_type != kNumberTypeFloat32) { if (output_data_type != kNumberTypeFloat32) {
if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) { if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {

@ -46,7 +46,13 @@ int QuantDTypeCastCoder::DoCode(CoderContext *const context) {
: input_tensor_->quant_params().at(0); : input_tensor_->quant_params().at(0);
int num_unit_thread = input_tensor_->ElementsNum(); int num_unit_thread = input_tensor_->ElementsNum();
Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"}); Collect(context,
{
"nnacl/int8/quant_dtype_cast_int8.h",
},
{
"quant_dtype_cast_int8.c",
});
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) { if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {

@ -75,7 +75,13 @@ int AddInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_add_s8.c"}); Collect(context,
{
"CMSIS/NN/Include/arm_nnfunctions.h",
},
{
"arm_elementwise_add_s8.c",
});
code.CodeFunction("arm_elementwise_add_s8", input1_, input2, input_1_offset_, input_1_mult_, input_1_shift_, code.CodeFunction("arm_elementwise_add_s8", input1_, input2, input_1_offset_, input_1_mult_, input_1_shift_,
input_2_offset_, input_2_mult_, input_2_shift_, left_shift_, output_tensor_, out_offset_, out_mult_, input_2_offset_, input_2_mult_, input_2_shift_, left_shift_, output_tensor_, out_offset_, out_mult_,

@ -39,9 +39,11 @@ int Conv2DInt8Coder::Prepare(CoderContext *const context) {
int Conv2DInt8Coder::DoCode(CoderContext *const context) { int Conv2DInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
std::vector<std::string> h_files; Collect(context,
std::vector<std::string> c_files; {
h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h"); "CMSIS/NN/Include/arm_nnfunctions.h",
},
{});
if (opt_ != Convolve_1x1_fast) { if (opt_ != Convolve_1x1_fast) {
code.CodeFunction("memset", buffer_, 0, buffer_size_); code.CodeFunction("memset", buffer_, 0, buffer_size_);
} }
@ -49,25 +51,36 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) {
code.CodeArray("output_mult", output_mult_, output_ch_); code.CodeArray("output_mult", output_mult_, output_ch_);
switch (opt_) { switch (opt_) {
case Basic: case Basic:
c_files = {"arm_convolve_s8.c", "arm_nn_mat_mult_kernel_s8_s16.c", "arm_q7_to_q15_with_offset.c"}; Collect(context, {},
Collect(context, h_files, c_files); {
"arm_convolve_s8.c",
"arm_nn_mat_mult_kernel_s8_s16.c",
"arm_q7_to_q15_with_offset.c",
});
code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_, code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_,
output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_, output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
out_activation_max_, output_x_, output_y_, buffer_); out_activation_max_, output_x_, output_y_, buffer_);
break; break;
case Convolve_1_x_n: case Convolve_1_x_n:
c_files = {"arm_convolve_1_x_n_s8.c", "arm_nn_mat_mul_core_1x_s8.c"}; Collect(context, {},
Collect(context, h_files, c_files); {
"arm_convolve_1_x_n_s8.c",
"arm_nn_mat_mul_core_1x_s8.c",
});
code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_, code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_,
output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift", output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift",
"output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_, "output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_,
buffer_); buffer_);
break; break;
case Convolve_1x1_fast: case Convolve_1x1_fast:
c_files = {"arm_convolve_1x1_s8_fast.c", "arm_nn_mat_mult_nt_t_s8.c", "arm_nn_mat_mul_core_4x_s8.c", Collect(context, {},
"arm_nn_mat_mul_core_1x_s8.c"}; {
Collect(context, h_files, c_files); "arm_convolve_1x1_s8_fast.c",
"arm_nn_mat_mult_nt_t_s8.c",
"arm_nn_mat_mul_core_4x_s8.c",
"arm_nn_mat_mul_core_1x_s8.c",
});
code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_,
filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_,
"output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,

@ -38,16 +38,19 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
std::vector<std::string> h_files; Collect(context,
std::vector<std::string> c_files; {
"CMSIS/NN/Include/arm_nnfunctions.h",
h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h"); },
{});
code.CodeArray("output_shift", output_shift_, output_ch_); code.CodeArray("output_shift", output_shift_, output_ch_);
code.CodeArray("output_mult", output_mult_, output_ch_); code.CodeArray("output_mult", output_mult_, output_ch_);
switch (optimize_) { switch (optimize_) {
case Conv_3x3: case Conv_3x3:
c_files.emplace_back("arm_depthwise_conv_3x3_s8.c"); Collect(context, {},
Collect(context, h_files, c_files); {
"arm_depthwise_conv_3x3_s8.c",
});
code.CodeFunction("arm_depthwise_conv_3x3_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_, code.CodeFunction("arm_depthwise_conv_3x3_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, "output_shift", output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, "output_shift",
"output_mult", output_x_, output_y_, output_offset_, input_offset_, output_activation_min_, "output_mult", output_x_, output_y_, output_offset_, input_offset_, output_activation_min_,
@ -55,9 +58,11 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
break; break;
case Conv_opt: case Conv_opt:
// arm_depthwise_conv_s8_opt also depends on arm_depthwise_conv_s8 // arm_depthwise_conv_s8_opt also depends on arm_depthwise_conv_s8
c_files.emplace_back("arm_depthwise_conv_s8.c"); Collect(context, {},
c_files.emplace_back("arm_depthwise_conv_s8_opt.c"); {
Collect(context, h_files, c_files); "arm_depthwise_conv_s8.c",
"arm_depthwise_conv_s8_opt.c",
});
code.CodeFunction("arm_depthwise_conv_s8_opt", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_, code.CodeFunction("arm_depthwise_conv_s8_opt", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_, output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,
@ -65,8 +70,10 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
"NULL"); "NULL");
break; break;
case Basic: case Basic:
c_files.emplace_back("arm_depthwise_conv_s8.c"); Collect(context, {},
Collect(context, h_files, c_files); {
"arm_depthwise_conv_s8.c",
});
code.CodeFunction("arm_depthwise_conv_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_, code.CodeFunction("arm_depthwise_conv_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
output_ch_, ch_mult_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_ch_, ch_mult_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_, output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,

@ -35,7 +35,14 @@ int FullConnectionInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_fully_connected_s8.c", "arm_nn_vec_mat_mult_t_s8.c"}); Collect(context,
{
"CMSIS/NN/Include/arm_nnfunctions.h",
},
{
"arm_fully_connected_s8.c",
"arm_nn_vec_mat_mult_t_s8.c",
});
code.CodeFunction("arm_fully_connected_s8", input_tensor_, filter_tensor_, col_dim_, row_dim_, nb_batches_, code.CodeFunction("arm_fully_connected_s8", input_tensor_, filter_tensor_, col_dim_, row_dim_, nb_batches_,
input_offset_, filter_offset_, out_multiplier_, out_shift_, output_offset_, bias_tensor_, input_offset_, filter_offset_, out_multiplier_, out_shift_, output_offset_, bias_tensor_,

@ -60,7 +60,13 @@ int MulInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_mul_s8.c"}); Collect(context,
{
"CMSIS/NN/Include/arm_nnfunctions.h",
},
{
"arm_elementwise_mul_s8.c",
});
code.CodeFunction("arm_elementwise_mul_s8", input1_, input2_, input_1_offset_, input_2_offset_, output_tensor_, code.CodeFunction("arm_elementwise_mul_s8", input1_, input2_, input_1_offset_, input_2_offset_, output_tensor_,
out_offset_, out_mult_, out_shift_, out_activation_min_, out_activation_max_, block_size_); out_offset_, out_mult_, out_shift_, out_activation_min_, out_activation_max_, block_size_);

@ -42,18 +42,27 @@ int PoolingInt8Coder::DoCode(CoderContext *const context) {
// init struct PoolingParameters // init struct PoolingParameters
std::string pooling_func; std::string pooling_func;
std::vector<std::string> cFiles;
if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) { if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) {
cFiles = {"arm_avgpool_s8.c"}; Collect(context, {},
{
"arm_avgpool_s8.c",
});
pooling_func = "arm_avgpool_s8"; pooling_func = "arm_avgpool_s8";
} else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) { } else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) {
cFiles = {"arm_max_pool_s8.c"}; Collect(context, {},
{
"arm_max_pool_s8.c",
});
pooling_func = "arm_max_pool_s8"; pooling_func = "arm_max_pool_s8";
} else { } else {
MS_LOG(ERROR) << "unsupported pad mode"; MS_LOG(ERROR) << "unsupported pad mode";
return RET_ERROR; return RET_ERROR;
} }
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, cFiles); Collect(context,
{
"CMSIS/NN/Include/arm_nnfunctions.h",
},
{});
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);

@ -69,7 +69,13 @@ int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
Serializer code; Serializer code;
code.precision(kPrecision); code.precision(kPrecision);
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_softmax_s8.c"}); Collect(context,
{
"CMSIS/NN/Include/arm_nnfunctions.h",
},
{
"arm_softmax_s8.c",
});
code.CodeFunction("arm_softmax_s8", input_tensor_, num_rows_, row_size_, mult_, shift_, diff_min_, output_tensor_); code.CodeFunction("arm_softmax_s8", input_tensor_, num_rows_, row_size_, mult_, shift_, diff_min_, output_tensor_);
MS_LOG(INFO) << "SoftMaxInt8Coder has been called"; MS_LOG(INFO) << "SoftMaxInt8Coder has been called";

@ -33,7 +33,13 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) {
int stride = UP_DIV(length, thread_num_); int stride = UP_DIV(length, thread_num_);
int count = MSMIN(stride, length - stride * task_id); int count = MSMIN(stride, length - stride * task_id);
Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"}); Collect(context,
{
"nnacl/fp32/activation_fp32.h",
},
{
"activation_fp32.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
switch (activation_parameter->type_) { switch (activation_parameter->type_) {
case schema::ActivationType_RELU: case schema::ActivationType_RELU:

@ -28,7 +28,15 @@ int AddNFP32Coder::DoCode(CoderContext *const context) {
int elements_num = input0->ElementsNum(); int elements_num = input0->ElementsNum();
// Get Tensor Pointer // Get Tensor Pointer
Collect(context, {"nnacl/kernel/fp32/add_fp32.h"}, {"add_fp32.c", "arithmetic_fp32.c", "arithmetic_base.c"}); Collect(context,
{
"nnacl/kernel/fp32/add_fp32.h",
},
{
"add_fp32.c",
"arithmetic_fp32.c",
"arithmetic_base.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num); code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num);
if (input_tensors_.size() > 2) { if (input_tensors_.size() > 2) {

@ -19,6 +19,7 @@
#include <type_traits> #include <type_traits>
#include "coder/opcoders/file_collector.h" #include "coder/opcoders/file_collector.h"
#include "nnacl/fp32/arithmetic_fp32.h" #include "nnacl/fp32/arithmetic_fp32.h"
#include "coder/opcoders/parallel.h"
#include "coder/log.h" #include "coder/log.h"
namespace mindspore::lite::micro::nnacl { namespace mindspore::lite::micro::nnacl {
@ -245,8 +246,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
return RET_OK; return RET_OK;
} }
int ArithmeticFP32Coder::DoCode(CoderContext *const context) { void ArithmeticFP32Coder::ComputeInOutStrides() {
int task_id = 0;
if (arithmetic_parameter_->broadcasting_) { if (arithmetic_parameter_->broadcasting_) {
outside_ = 1; outside_ = 1;
for (auto i = arithmetic_parameter_->ndim_ - 1; i >= 0; --i) { for (auto i = arithmetic_parameter_->ndim_ - 1; i >= 0; --i) {
@ -263,11 +263,15 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
ComputeStrides(arithmetic_parameter_->out_shape_, arithmetic_parameter_->out_strides_, ComputeStrides(arithmetic_parameter_->out_shape_, arithmetic_parameter_->out_strides_,
arithmetic_parameter_->ndim_); arithmetic_parameter_->ndim_);
} }
}
int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
ComputeInOutStrides();
int element_num = output_tensor_->ElementsNum(); int element_num = output_tensor_->ElementsNum();
MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0"); MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
int stride = UP_DIV(element_num, thread_num_); int stride = UP_DIV(element_num, thread_num_);
int count = MSMIN(stride, element_num - stride * task_id); int count = MSMIN(stride, element_num - stride * kDefaultTaskId);
MS_CHECK_TRUE(!arithmetic_run_.empty(), "arithmetic_run function is nullptr!"); MS_CHECK_TRUE(!arithmetic_run_.empty(), "arithmetic_run function is nullptr!");
NNaclFp32Serializer code; NNaclFp32Serializer code;
/** /**
@ -275,22 +279,55 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
* this solution is not suitable for micro, for the size of package. * this solution is not suitable for micro, for the size of package.
* */ * */
if (arithmetic_opt_run_ == "ElementOptSub" || arithmetic_run_ == "ElementSub") { if (arithmetic_opt_run_ == "ElementOptSub" || arithmetic_run_ == "ElementSub") {
Collect(context, {"nnacl/fp32/sub_fp32.h"}, {"sub_fp32.c"}); Collect(context,
{
"nnacl/fp32/sub_fp32.h",
},
{
"sub_fp32.c",
});
} else if (arithmetic_opt_run_ == "ElementOptAdd" || arithmetic_run_ == "ElementAdd") { } else if (arithmetic_opt_run_ == "ElementOptAdd" || arithmetic_run_ == "ElementAdd") {
Collect(context, {"nnacl/fp32/add_fp32.h"}, {"add_fp32.c", "arithmetic_fp32.c", "arithmetic_base.c"}); Collect(context,
{
"nnacl/fp32/add_fp32.h",
},
{
"add_fp32.c",
"arithmetic_fp32.c",
"arithmetic_base.c",
});
} else if (arithmetic_opt_run_ == "ElementOptMul" || arithmetic_run_ == "ElementMul") { } else if (arithmetic_opt_run_ == "ElementOptMul" || arithmetic_run_ == "ElementMul") {
Collect(context, {"nnacl/fp32/mul_fp32.h"}, {"mul_fp32.c"}); Collect(context,
{
"nnacl/fp32/mul_fp32.h",
},
{
"mul_fp32.c",
});
} else if (arithmetic_run_ == "ElementAddRelu") { } else if (arithmetic_run_ == "ElementAddRelu") {
Collect(context, {"nnacl/fp32/add_relu_fp32.h"}, {"add_relu_fp32.c"}); Collect(context,
{
"nnacl/fp32/add_relu_fp32.h",
},
{
"add_relu_fp32.c",
});
} else { } else {
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_fp32.h"}, Collect(context,
{"arithmetic_common.c", "arithmetic_fp32.c"}); {
"nnacl/arithmetic_common.h",
"nnacl/fp32/arithmetic_fp32.h",
},
{
"arithmetic_common.c",
"arithmetic_fp32.c",
});
} }
if (arithmetic_parameter_->broadcasting_) { if (arithmetic_parameter_->broadcasting_) {
stride = UP_DIV(outside_, thread_num_); stride = UP_DIV(outside_, thread_num_);
out_count_ = MSMIN(stride, outside_ - stride * task_id); out_count_ = MSMIN(stride, outside_ - stride * kDefaultTaskId);
out_thread_stride_ = stride * task_id; out_thread_stride_ = stride * kDefaultTaskId;
std::string input0_str = allocator_->GetRuntimeAddr(input_tensor_); std::string input0_str = allocator_->GetRuntimeAddr(input_tensor_);
std::string input1_str = allocator_->GetRuntimeAddr(filter_tensor_); std::string input1_str = allocator_->GetRuntimeAddr(filter_tensor_);
std::string output_str = allocator_->GetRuntimeAddr(output_tensor_); std::string output_str = allocator_->GetRuntimeAddr(output_tensor_);

@ -80,6 +80,8 @@ class ArithmeticFP32Coder final : public OperatorCoder {
private: private:
int Init(CoderContext *const context); int Init(CoderContext *const context);
void ComputeInOutStrides();
int BroadcastRun(const std::string &input0, const std::string &input1, const std::string &output, int dim, int BroadcastRun(const std::string &input0, const std::string &input1, const std::string &output, int dim,
int out_count, int out_thread_stride, NNaclFp32Serializer *const code); int out_count, int out_thread_stride, NNaclFp32Serializer *const code);

@ -66,7 +66,14 @@ int ArithmeticSelfFP32Coder::DoCode(CoderContext *const context) {
MS_CHECK_TRUE(!arithmetic_self_run_.empty(), "arithmetic_run function is nullptr!"); MS_CHECK_TRUE(!arithmetic_self_run_.empty(), "arithmetic_run function is nullptr!");
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_self.h"}, {"nnacl/fp32/arithmetic_self.c"}); Collect(context,
{
"nnacl/arithmetic_common.h",
"nnacl/fp32/arithmetic_self.h",
},
{
"nnacl/fp32/arithmetic_self.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
code.CodeFunction(arithmetic_self_run_, input_tensor_, output_tensor_, size); code.CodeFunction(arithmetic_self_run_, input_tensor_, output_tensor_, size);

@ -54,7 +54,13 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
MS_CHECK_TRUE(input_tensors_.size() == 3, "inputs size is not equal to three"); MS_CHECK_TRUE(input_tensors_.size() == 3, "inputs size is not equal to three");
Tensor *mean_tensor = input_tensors_.at(1); Tensor *mean_tensor = input_tensors_.at(1);
Tensor *var_tensor = input_tensors_.at(2); Tensor *var_tensor = input_tensors_.at(2);
Collect(context, {"nnacl/fp32/batchnorm.h"}, {"nnacl/fp32/batchnorm.c"}); Collect(context,
{
"nnacl/fp32/batchnorm.h",
},
{
"nnacl/fp32/batchnorm.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
code.CodeStruct("bn_parameter", *bn_parameter); code.CodeStruct("bn_parameter", *bn_parameter);
code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_); code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_);

@ -38,9 +38,19 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
size_t data_size = input_tensor_->ElementsNum(); size_t data_size = input_tensor_->ElementsNum();
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true); std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true);
Collect(ctx, Collect(ctx,
{"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h", {
"nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"}, "nnacl/arithmetic.h",
{"arithmetic_base.c", "arithmetic_fp32.c", "add_fp32.c"}); "nnacl/nnacl_utils.h",
"nnacl/nnacl_common.h",
"nnacl/base/arithmetic_base.h",
"nnacl/fp32/add_fp32.h",
"nnacl/fp32/arithmetic_fp32.h",
},
{
"arithmetic_base.c",
"arithmetic_fp32.c",
"add_fp32.c",
});
nnacl::NNaclFp32Serializer code; nnacl::NNaclFp32Serializer code;
std::vector<int> dims = input_tensor_->shape(); std::vector<int> dims = input_tensor_->shape();
arithmetic_parameter_->broadcasting_ = false; arithmetic_parameter_->broadcasting_ = false;

@ -35,7 +35,13 @@ int ConcatFP32Coder::ReSize() {
} }
int ConcatFP32Coder::DoCode(CoderContext *const context) { int ConcatFP32Coder::DoCode(CoderContext *const context) {
Collect(context, {"nnacl/fp32/concat.h"}, {"nnacl/fp32/concat.c"}); Collect(context,
{
"nnacl/fp32/concat.h",
},
{
"nnacl/fp32/concat.c",
});
size_t input_num = input_tensors_.size(); size_t input_num = input_tensors_.size();

@ -62,7 +62,16 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
MS_CHECK_TRUE(conv_param_->input_channel_ == conv_param_->output_channel_, MS_CHECK_TRUE(conv_param_->input_channel_ == conv_param_->output_channel_,
"Only support input channel equals output channel."); "Only support input channel equals output channel.");
// generate code .h .c // generate code .h .c
Collect(context, {"nnacl/fp32/conv_depthwise_fp32.h"}, {"conv_depthwise_fp32.c"}, {"ConvDwFp32Row.S"}); Collect(context,
{
"nnacl/fp32/conv_depthwise_fp32.h",
},
{
"conv_depthwise_fp32.c",
},
{
"ConvDwFp32Row.S",
});
nnacl::NNaclFp32Serializer code; nnacl::NNaclFp32Serializer code;
// call the op function // call the op function

@ -19,12 +19,10 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
#include "nnacl/fp32/winograd_utils.h" #include "nnacl/fp32/winograd_utils.h"
#include "src/ops/populate/populate_register.h"
#include "coder/opcoders/file_collector.h" #include "coder/opcoders/file_collector.h"
#include "coder/log.h" #include "coder/log.h"
#include "src/common/prim_util.h" #include "coder/opcoders/parallel.h"
#include "src/common/version_manager.h" #include "src/common/version_manager.h"
#include "coder/opcoders/nnacl/dequant/de_quant.h" #include "coder/opcoders/nnacl/dequant/de_quant.h"
@ -109,37 +107,60 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
} }
int ConvolutionFP32Coder::DoCode(CoderContext *const context) { int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
{ Collect(context,
std::vector<std::string> asmFiles; {
if (target_ == kARM32A) { "nnacl/fp32/conv_common_fp32.h",
asmFiles = {"MatmulFp32.S", "nnacl/fp32/matmul_fp32.h",
"MatmulFp32Opt.S", "nnacl/conv_parameter.h",
"PreSum4x16Int8Peroc.S", "nnacl/op_base.h",
"PreSum4x16Int8Pert.S", },
"IndirectGemmInt16to32_8x4.S", {
"MatmulInt8.S", "common_func.c",
"MatmulFp32Opt12x4.S"}; "conv_common_fp32.c",
} else if (target_ == kARM64) { "matmul_fp32.c",
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S", "pack_fp32.c",
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"}; });
} if (de_quant_flag_) {
std::vector<std::string> h_files = {"nnacl/fp32/conv_common_fp32.h", "nnacl/fp32/matmul_fp32.h", Collect(context,
"nnacl/conv_parameter.h", "nnacl/op_base.h"}; {
std::vector<std::string> c_files = {"common_func.c", "conv_common_fp32.c", "matmul_fp32.c", "pack_fp32.c"}; "wrapper/fp32/dequant_int8_to_fp32_wrapper.h",
if (de_quant_flag_) { },
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h"); {
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c"); "dequant_int8_to_fp32_wrapper.c",
} });
Collect(context, h_files, c_files, asmFiles); }
if (target_ == kARM32A) {
Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
"MatmulFp32Opt12x4.S",
});
} else if (target_ == kARM64) {
Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"MatVecMulFp32.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
});
} }
NNaclFp32Serializer code; NNaclFp32Serializer code;
// call the op function // call the op function
code.CodeFunction("memset", packed_input_, "0", packed_input_size_); code.CodeFunction("memset", packed_input_, "0", packed_input_size_);
code.CodeFunction("memset", col_major_input_, "0", col_major_input_size_); code.CodeFunction("memset", col_major_input_, "0", col_major_input_size_);
code.CodeStruct("conv_parameter", *conv_param_); code.CodeStruct("conv_parameter", *conv_param_);
int task_id = 0;
code.CodeFunction("ConvFp32", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_, code.CodeFunction("ConvFp32", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_,
output_tensor_, task_id, "(ConvParameter *)&conv_parameter"); output_tensor_, kDefaultTaskId, "&conv_parameter");
context->AppendCode(code.str()); context->AppendCode(code.str());
return RET_OK; return RET_OK;

@ -17,6 +17,7 @@
#include <array> #include <array>
#include "nnacl/base/minimal_filtering_generator.h" #include "nnacl/base/minimal_filtering_generator.h"
#include "coder/log.h" #include "coder/log.h"
#include "coder/opcoders/parallel.h"
#include "coder/opcoders/file_collector.h" #include "coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
@ -213,20 +214,46 @@ std::string ConvolutionWinogradFP32Coder::GetOutputTransFunc(int input_unit, int
} }
int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) { int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
std::vector<std::string> asmFiles; Collect(context,
{
"nnacl/fp32/conv_winograd_fp32.h",
"nnacl/common_func.h",
},
{
"common_func.c",
"conv_int8.c",
"matmul_int8.c",
"pack_fp32.c",
"conv_winograd_fp32.c",
"winograd_transform.c",
"common_func_fp32.c",
"fixed_point.c",
"winograd_utils.c",
"minimal_filtering_generator.c",
});
if (target_ == kARM32A) { if (target_ == kARM32A) {
asmFiles = { Collect(context, {}, {},
"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", {
"MatmulInt8.S"}; "MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
});
} else if (target_ == kARM64) { } else if (target_ == kARM64) {
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S", Collect(context, {}, {},
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"}; {
"MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"MatVecMulFp32.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
});
} }
Collect(
context, {"nnacl/fp32/conv_winograd_fp32.h", "nnacl/common_func.h"},
{"common_func.c", "conv_int8.c", "matmul_int8.c", "pack_fp32.c", "conv_winograd_fp32.c", "winograd_transform.c",
"common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"},
asmFiles);
NNaclFp32Serializer code; NNaclFp32Serializer code;
// call the op function // call the op function
@ -239,9 +266,8 @@ int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
<< allocator_->GetRuntimeAddr(col_buffer_) << "};\n"; << allocator_->GetRuntimeAddr(col_buffer_) << "};\n";
code.CodeStruct("conv_parameter", *conv_param_); code.CodeStruct("conv_parameter", *conv_param_);
// code operator func // code operator func
int task_id = 0;
code.CodeFunction("ConvWinogardFp32", input_tensor_, trans_weight_, new_bias_, output_tensor_, code.CodeFunction("ConvWinogardFp32", input_tensor_, trans_weight_, new_bias_, output_tensor_,
"tmp_buffer_address_list", task_id, "&conv_parameter", in_func_, out_func_); "tmp_buffer_address_list", kDefaultTaskId, "&conv_parameter", in_func_, out_func_);
context->AppendCode(code.str()); context->AppendCode(code.str());
return RET_OK; return RET_OK;
} }

@ -32,7 +32,13 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
Tensor *input1 = input_tensors_.at(1); Tensor *input1 = input_tensors_.at(1);
// generate code .h .c // generate code .h .c
Collect(context, {"nnacl/fp32/gather.h"}, {"nnacl/fp32/gather.c"}); Collect(context,
{
"nnacl/fp32/gather.h",
},
{
"nnacl/fp32/gather.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
std::vector<int> in_shape = input0->shape(); std::vector<int> in_shape = input0->shape();

@ -124,19 +124,39 @@ int MatMulFP32BaseCoder::Prepare(CoderContext *const context) { return RET_OK; }
int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
// generate code .h .c // generate code .h .c
std::vector<std::string> asm_files; Collect(context,
{
"nnacl/fp32/matmul_fp32.h",
"wrapper/fp32/matmul_fp32_wrapper.h",
},
{
"matmul_fp32.c",
"matmul_fp32_wrapper.c",
});
if (target_ == kARM32A) { if (target_ == kARM32A) {
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatmulFp32Opt12x4.S"}; Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"MatmulFp32Opt12x4.S",
});
} else if (target_ == kARM64) { } else if (target_ == kARM64) {
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatVecMulFp32.S"}; Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"MatVecMulFp32.S",
});
} }
std::vector<std::string> h_files = {"nnacl/fp32/matmul_fp32.h", "wrapper/fp32/matmul_fp32_wrapper.h"};
std::vector<std::string> c_files = {"matmul_fp32.c", "matmul_fp32_wrapper.c"};
if (de_quant_flag_) { if (de_quant_flag_) {
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h"); Collect(context,
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c"); {
"wrapper/fp32/dequant_int8_to_fp32_wrapper.h",
},
{
"dequant_int8_to_fp32_wrapper.c",
});
} }
Collect(context, h_files, c_files, asm_files);
NNaclFp32Serializer code; NNaclFp32Serializer code;
NNaclFp32Serializer init_code; NNaclFp32Serializer init_code;
code.CodeStruct("mat_mul_parameter", *params_); code.CodeStruct("mat_mul_parameter", *params_);

@ -27,7 +27,13 @@ int Nchw2NhwcFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
int Nchw2NhwcFP32Coder::DoCode(CoderContext *context) { int Nchw2NhwcFP32Coder::DoCode(CoderContext *context) {
// generate code .h .c // generate code .h .c
Collect(context, {"nnacl/pack.h"}, {"nnacl/pack.c"}); Collect(context,
{
"nnacl/pack.h",
},
{
"nnacl/pack.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
if (input_tensor_->shape().size() == 4) { if (input_tensor_->shape().size() == 4) {
if (input_tensor_->data_type() == kNumberTypeFloat32) { if (input_tensor_->data_type() == kNumberTypeFloat32) {

@ -25,7 +25,13 @@ int Nhwc2NchwFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
int Nhwc2NchwFP32Coder::DoCode(CoderContext *const context) { int Nhwc2NchwFP32Coder::DoCode(CoderContext *const context) {
// generate code .h .c // generate code .h .c
Collect(context, {"nnacl/pack.h"}, {"pack.c"}); Collect(context,
{
"nnacl/pack.h",
},
{
"pack.c",
});
NNaclFp32Serializer code; NNaclFp32Serializer code;
if (input_tensor_->shape().size() == 4) { if (input_tensor_->shape().size() == 4) {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save