diff --git a/mindspore/lite/nnacl/fp16/pad_fp16.c b/mindspore/lite/nnacl/fp16/pad_fp16.c index 2ff775a867..0dd833af6b 100644 --- a/mindspore/lite/nnacl/fp16/pad_fp16.c +++ b/mindspore/lite/nnacl/fp16/pad_fp16.c @@ -33,3 +33,10 @@ void PadFp16(const float16_t *input_data, float16_t *output_data, const int *inp } } } + +void MirrorPadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, + const PadParameter *pad_param, int begin, int end) { + for (int i = begin; i < end; ++i) { + output_data[i] = input_data[GetInputFlattenIndex(i, input_shape, pad_param)]; + } +} diff --git a/mindspore/lite/nnacl/fp16/pad_fp16.h b/mindspore/lite/nnacl/fp16/pad_fp16.h index e6c783829f..8954783c46 100644 --- a/mindspore/lite/nnacl/fp16/pad_fp16.h +++ b/mindspore/lite/nnacl/fp16/pad_fp16.h @@ -19,12 +19,15 @@ #ifdef ENABLE_NEON #include #endif +#include "nnacl/fp32/pad.h" #ifdef __cplusplus extern "C" { #endif void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape, const int *paddings, const int tid, const int thread_num); +void MirrorPadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, + const PadParameter *pad_param, int begin, int end); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32/pad.h b/mindspore/lite/nnacl/fp32/pad.h index ad33109476..0b793442b2 100644 --- a/mindspore/lite/nnacl/fp32/pad.h +++ b/mindspore/lite/nnacl/fp32/pad.h @@ -31,6 +31,9 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co const int *paddings, const int tid, const int thread_num); void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param, int begin, int end); + +int TransOut2InputDimIndex(int out_dim_index, int left_pad, int in_dim, int offset); +int GetInputFlattenIndex(int out_flatten_index, const int *input_shape, const PadParameter *pad_param); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc index 699c3f31ae..808935b449 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc @@ -32,6 +32,14 @@ int PadFp16CPUKernel::RunImpl(int task_id) { return RET_OK; } +int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { + int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_); + int begin = unit * task_id; + int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum()); + MirrorPadFp16(input_, output_, in_, pad_param_, begin, end); + return RET_OK; +} + int PadFp16CPUKernel::Run() { auto input_tensor = in_tensors_.at(0); auto output_tensor = out_tensors_.at(0); @@ -45,18 +53,26 @@ int PadFp16CPUKernel::Run() { MS_LOG(ERROR) << "input or output is nullptr"; return RET_ERROR; } - - if (pad_param_->constant_value_ - 0.0f < 1e-5) { - memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t)); + int ret = 0; + if (pad_param_->pad_mode_ == static_cast(schema::PaddingMode_CONSTANT)) { + if (pad_param_->constant_value_ - 0.0f < 1e-5) { + memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t)); + } else { + for (int i = 0; i < output_tensor->ElementsNum(); ++i) { + output_[i] = pad_param_->constant_value_; + } + } + ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; + } } else { - for (int i = 0; i < output_tensor->ElementsNum(); ++i) { - output_[i] = pad_param_->constant_value_; + HandleMirrorPad(); + ret = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]"; } } - auto ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; - } if (is_output_fp32_) { Float16ToFloat32(output_, reinterpret_cast(output_tensor->MutableData()), output_tensor->ElementsNum()); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h index 5f96fb4b6e..fe3dd1021b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h @@ -32,6 +32,7 @@ class PadFp16CPUKernel : public PadCPUKernel { int Run() override; int RunImpl(int task_id) override; + int RunMirrorPadImpl(int task_id) override; private: void FreeInputAndOutput(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h index 4080927d02..c6bb02221f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h @@ -38,10 +38,9 @@ class PadCPUKernel : public LiteKernel { int ReSize() override; int Run() override; virtual int RunImpl(int task_id); - int RunMirrorPadImpl(int task_id); + virtual int RunMirrorPadImpl(int task_id); private: - int HandleMirrorPad(); int CheckPaddings(int *paddings, int length, int *input_shape, int mode); int CopyPaddingFromInput(); void CalculateStrides(); @@ -49,6 +48,7 @@ class PadCPUKernel : public LiteKernel { int ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length); protected: + int HandleMirrorPad(); PadParameter *pad_param_ = nullptr; int in_[4] = {0}; int out_[4] = {0};