diff --git a/mindspore/lite/nnacl/base/reshape_base.h b/mindspore/lite/nnacl/base/reshape_base.h deleted file mode 100644 index d2b12302c8..0000000000 --- a/mindspore/lite/nnacl/base/reshape_base.h +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ -#define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ - -#include -#include "nnacl/op_base.h" - -#ifdef __cplusplus -extern "C" { -#endif - -inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { - memcpy(output_ptr, input_ptr, data_size); -} - -#ifdef __cplusplus -} -#endif - -#endif // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ diff --git a/mindspore/lite/nnacl/base/squeeze_base.h b/mindspore/lite/nnacl/base/squeeze_base.h deleted file mode 100644 index 5f3ea2da4e..0000000000 --- a/mindspore/lite/nnacl/base/squeeze_base.h +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_ -#define MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_ - -#include "nnacl/errorcode.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) { - if (input_ptr == NULL || output_ptr == NULL) { - return NNACL_ERR; - } - (void)memcpy(output_ptr, input_ptr, data_size); - return NNACL_OK; -} - -#ifdef __cplusplus -} -#endif - -#endif // MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_ diff --git a/mindspore/lite/nnacl/op_base.h b/mindspore/lite/nnacl/op_base.h index f9cb3d6c44..82105f8e97 100644 --- a/mindspore/lite/nnacl/op_base.h +++ b/mindspore/lite/nnacl/op_base.h @@ -64,6 +64,7 @@ typedef enum LiteDataType { kDataTypeFloat, + kDataTypeFloat16, kDataTypeInt, kDataTypeInt8, KDataTypeBool, diff --git a/mindspore/lite/nnacl/strided_slice.c b/mindspore/lite/nnacl/strided_slice.c index f227a082f8..a7a14022bc 100644 --- a/mindspore/lite/nnacl/strided_slice.c +++ b/mindspore/lite/nnacl/strided_slice.c @@ -108,6 +108,10 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p *((int8_t *)out_data + out_offset) = *((int8_t *)in_data + in_offset); } else if (param->data_type == kDataTypeInt) { *((int32_t *)out_data + out_offset) = *((int32_t *)in_data + in_offset); +#ifdef ENABLE_ARM64 + } else if (param->data_type == kDataTypeFloat16) { + *((float16_t *)out_data + out_offset) = *((float16_t *)in_data + in_offset); +#endif } else { return NNACL_ERR; } @@ -120,3 +124,15 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p } return NNACL_OK; } + +void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size, + size_t in_offset) { + for (size_t i = 0; i < outer; ++i) { + const uint8_t *input_ptr = input + i * in_offset; + for (int j = 0; j < split_len; ++j) { + memcpy(output, input_ptr, inner_size); + output += inner_size; + input_ptr += inner_size * stride; + } + } +} diff --git a/mindspore/lite/nnacl/strided_slice.h b/mindspore/lite/nnacl/strided_slice.h index 9d3d353990..2e0ff78a4f 100644 --- a/mindspore/lite/nnacl/strided_slice.h +++ b/mindspore/lite/nnacl/strided_slice.h @@ -39,6 +39,9 @@ typedef struct StridedSliceParameter { extern "C" { #endif int DoStridedSlice(const void *inputs, void *output, StridedSliceParameter *param); + +void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size, + size_t in_offset); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/src/ops/populate/expand_dims_populate.cc b/mindspore/lite/src/ops/populate/expand_dims_populate.cc index 63901a9993..bb62cc477c 100644 --- a/mindspore/lite/src/ops/populate/expand_dims_populate.cc +++ b/mindspore/lite/src/ops/populate/expand_dims_populate.cc @@ -26,6 +26,7 @@ OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *prim MS_LOG(ERROR) << "malloc ExpandDimsParameter failed."; return nullptr; } + expand_dims_param->type_ = primitive->Type(); memset(expand_dims_param, 0, sizeof(OpParameter)); return reinterpret_cast(expand_dims_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc new file mode 100644 index 0000000000..9da7d0e779 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/base/reshape_base.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Reshape; + +namespace mindspore::kernel { +int ReshapeBaseCPUKernel::Init() { return ReSize(); } + +int ReshapeBaseCPUKernel::ReSize() { + int in_data_size = in_tensors_.front()->Size(); + int thread_num = context_->thread_num_; + cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num); + return RET_OK; +} + +int ReshapeBaseCPUKernel::RunImpl(int task_id) { + size_t start_index = task_id * cal_max_num_per_thread_; + auto cur_in_ptr = input_ptr_ + start_index; + auto cur_out_ptr = output_ptr_ + start_index; + size_t data_size = in_tensors_.front()->Size() - start_index; + data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size; + memcpy(cur_out_ptr, cur_in_ptr, data_size); + return RET_OK; +} + +int ReshapeRun(void *cdata, int task_id) { + auto reshape = reinterpret_cast(cdata); + auto ret = reshape->RunImpl(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} + +int ReshapeBaseCPUKernel::Run() { + input_ptr_ = reinterpret_cast(in_tensors_.at(kInputIndex)->data_c()); + output_ptr_ = reinterpret_cast(out_tensors_.at(kOutputIndex)->data_c()); + auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h similarity index 62% rename from mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h rename to mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h index 58a93984ba..064e11dec1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h @@ -13,32 +13,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ #include -#include "nnacl/fp16/cast_fp16.h" -#include "nnacl/base/reshape_base.h" #include "src/lite_kernel.h" #include "include/context.h" -#include "src/runtime/kernel/arm/fp32/reshape_fp32.h" using mindspore::lite::InnerContext; - namespace mindspore::kernel { -class ReshapeFp16CPUKernel : public ReshapeCPUKernel { +class ReshapeBaseCPUKernel : public LiteKernel { public: - ReshapeFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, + ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : ReshapeCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ReshapeFp16CPUKernel() = default; + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~ReshapeBaseCPUKernel() override = default; + int Init() override; + int ReSize() override; int Run() override; + int RunImpl(int task_id); private: + size_t cal_max_num_per_thread_ = 0; + uint8_t *input_ptr_ = nullptr; + uint8_t *output_ptr_ = nullptr; }; } // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc similarity index 61% rename from mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc rename to mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc index c5c3da59ba..2be895eddd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc @@ -13,34 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#include "src/runtime/kernel/arm/fp32/squeeze_fp32.h" -#include "schema/model_generated.h" +#include "src/runtime/kernel/arm/base/squeeze_base.h" #include "src/kernel_registry.h" -#include "include/errorcode.h" +#include "schema/model_generated.h" using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Squeeze; - namespace mindspore::kernel { -int SqueezeCPUKernel::Init() { return RET_OK; } - -int SqueezeCPUKernel::ReSize() { return RET_OK; } - -int SqueezeCPUKernel::Run() { - size_t data_size = in_tensors_.front()->Size(); - int ret = DoSqueeze(in_tensors_.front()->data_c(), out_tensors_.front()->data_c(), data_size); - - if (ret != RET_OK) { - MS_LOG(ERROR) << "Do squeeze fail!ret: " << ret; - return RET_ERROR; - } - return RET_OK; -} - -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator) -REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator) -REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Squeeze, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h similarity index 50% rename from mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h rename to mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h index 400dfe1f3e..e9a3a1dd1a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h @@ -13,30 +13,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_ #include -#include "src/lite_kernel.h" -#include "include/context.h" -#include "nnacl/base/reshape_base.h" +#include "src/runtime/kernel/arm/base/reshape_base.h" using mindspore::lite::InnerContext; - namespace mindspore::kernel { -class ReshapeCPUKernel : public LiteKernel { +class SqueezeBaseCPUKernel : public ReshapeBaseCPUKernel { public: - ReshapeCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, - const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ReshapeCPUKernel() = default; - - int Init() override; - int ReSize() override; - int Run() override; + SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : ReshapeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} + ~SqueezeBaseCPUKernel() override = default; }; } // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc index a7ebac6b46..3b327a0e5a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc @@ -27,7 +27,7 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Stack; namespace mindspore::kernel { -static int GetCopyNum(const std::vector &in_shape, int axis, int n_dim) { +static inline int GetCopyNum(const std::vector &in_shape, int axis, int n_dim) { int copy_num = 1; if (axis > 0) { for (int j = n_dim - 1; j > axis - 1; j--) { @@ -41,12 +41,12 @@ static int GetCopyNum(const std::vector &in_shape, int axis, int n_dim) { return copy_num; } -static size_t GetOutterSize(const std::vector &in_shape, int axis) { - size_t outter_size = 1; +static inline size_t GetOuterSize(const std::vector &in_shape, int axis) { + size_t outer_size = 1; for (int i = 0; i < axis; ++i) { - outter_size *= in_shape[i]; + outer_size *= in_shape[i]; } - return outter_size; + return outer_size; } int StackBaseCPUKernel::ReSize() { @@ -59,14 +59,13 @@ int StackBaseCPUKernel::ReSize() { } else { MS_ASSERT(input_nums > 1); copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_; - outter_size_ = GetOutterSize(input0_shape, axis_); + outer_size_ = GetOuterSize(input0_shape, axis_); } return RET_OK; } int StackBaseCPUKernel::Init() { - auto input0_tensor = in_tensors_.front(); - data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum(); + data_type_size_ = sizeof(float); if (!InferShapeDone()) { return RET_OK; } @@ -74,13 +73,21 @@ int StackBaseCPUKernel::Init() { } int StackBaseCPUKernel::Run() { + // malloc temporary memory to store all the inputs size_t inputs_num = in_tensors_.size(); char **all_inputs = static_cast(context_->allocator->Malloc(inputs_num * sizeof(char *))); + if (all_inputs == nullptr) { + MS_LOG(ERROR) << "malloc all_inputs failed."; + return RET_ERROR; + } for (size_t j = 0; j < inputs_num; ++j) { all_inputs[j] = reinterpret_cast(in_tensors_.at(j)->data_c()); } + // run stack auto output_data = reinterpret_cast(out_tensors_.at(0)->data_c()); - Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_); + Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outer_size_); + + // free temporary variable all_inputs context_->allocator->Free(all_inputs); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h index d78ecf76a9..4ea68271c2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h @@ -38,7 +38,7 @@ class StackBaseCPUKernel : public LiteKernel { int axis_ = 0; size_t data_type_size_ = 0; size_t copy_size_ = 0; - size_t outter_size_ = 1; + size_t outer_size_ = 1; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc index 77c896d405..31479ec782 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc @@ -33,11 +33,37 @@ int StridedSliceCPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; } - return ReSize(); } +void StridedSliceCPUKernel::InitFastRunParam() { + auto in_shape = in_tensors_.front()->shape(); + auto out_shape = out_tensors_.front()->shape(); + // cal inner, outer + for (int i = 0; i < split_axis_; ++i) { + outer_ *= in_shape[i]; + } + int inner = 1; + for (size_t i = split_axis_ + 1; i < in_shape.size(); i++) { + inner *= in_shape[i]; + } + inner_size_ = in_tensors_.front()->Size() / in_tensors_.front()->ElementsNum() * inner; + + // decide multi-thread launch strategy + if (outer_ == 1) { + parallel_on_split_axis_ = true; + cal_num_per_thread_ = UP_DIV(out_shape[split_axis_], context_->thread_num_); + } else { + parallel_on_outer_ = true; + cal_num_per_thread_ = UP_DIV(outer_, context_->thread_num_); + } +} + int StridedSliceCPUKernel::ReSize() { + fast_run_ = MatchFastPattern(); + if (fast_run_) { + InitFastRunParam(); + } if (op_parameter_ != nullptr) { free(op_parameter_); op_parameter_ = nullptr; @@ -51,7 +77,82 @@ int StridedSliceCPUKernel::ReSize() { return RET_OK; } -int StridedSliceCPUKernel::Run() { +bool StridedSliceCPUKernel::MatchFastPattern() { + // This function is seeking if that the number of only one dimension + // is different between input and output. If so, we can do some trick. + // Example 1: + // input shape info: [1, 80, 46, 40] + // output shape info: [1, 80, 20, 40] + // Example 2: + // input shape info: [1, 46, 40] + // output shape info: [1, 20, 40] + auto in_shape = in_tensors_.front()->shape(); + auto out_shape = out_tensors_.front()->shape(); + if (in_shape.size() != out_shape.size()) { + return false; + } + std::vector axis_list; + for (size_t i = 0; i < in_shape.size(); ++i) { + if (in_shape[i] != out_shape[i]) { + axis_list.emplace_back(i); + } + } + if (axis_list.size() == 1) { + split_axis_ = axis_list.front(); + return true; + } + return false; +} + +int StridedSliceCPUKernel::FastRunImpl(int task_id) { + auto in_shape = in_tensors_.front()->shape(); + auto out_shape = out_tensors_.front()->shape(); + int begin_index = param_->begins_[split_axis_]; + int caled_num = task_id * cal_num_per_thread_; + if (parallel_on_outer_) { + uint8_t *cur_in_ptr = input_ptr_ + (caled_num * in_shape[split_axis_] + begin_index) * inner_size_; + uint8_t *cur_out_ptr = output_ptr_ + caled_num * out_shape[split_axis_] * inner_size_; + int cur_outer = outer_ - caled_num; + if (cur_outer > cal_num_per_thread_) { + cur_outer = cal_num_per_thread_; + } + FastStride(cur_in_ptr, cur_out_ptr, out_shape[split_axis_], param_->strides_[split_axis_], cur_outer, inner_size_, + in_shape[split_axis_] * inner_size_); + } else { + MS_ASSERT(parallel_on_split_axis_); + uint8_t *cur_in_ptr = input_ptr_ + (caled_num * param_->strides_[split_axis_] + begin_index) * inner_size_; + uint8_t *cur_out_ptr = output_ptr_ + caled_num * inner_size_; + int cal_axis_num = out_shape[split_axis_] - caled_num; + if (cal_axis_num > cal_num_per_thread_) { + cal_axis_num = cal_num_per_thread_; + } + FastStride(cur_in_ptr, cur_out_ptr, cal_axis_num, param_->strides_[split_axis_], 1, inner_size_, 0); + } + return RET_OK; +} + +int StrideRun(void *cdata, int task_id) { + auto stride = reinterpret_cast(cdata); + auto ret = stride->FastRunImpl(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "StrideRun error task_id[" << task_id << "] error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} + +int StridedSliceCPUKernel::FastRun() { + input_ptr_ = reinterpret_cast(in_tensors_.front()->data_c()); + output_ptr_ = reinterpret_cast(out_tensors_.front()->data_c()); + auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]"; + return ret; + } + return RET_OK; +} + +int StridedSliceCPUKernel::NormalRun() { auto input = in_tensors_.at(0); MS_ASSERT(input); switch (input->data_type()) { @@ -61,6 +162,9 @@ int StridedSliceCPUKernel::Run() { case kNumberTypeFloat32: param_->data_type = kDataTypeFloat; break; + case kNumberTypeFloat16: + param_->data_type = kDataTypeFloat16; + break; case kNumberTypeInt32: param_->data_type = kDataTypeInt; break; @@ -78,7 +182,15 @@ int StridedSliceCPUKernel::Run() { return RET_OK; } +int StridedSliceCPUKernel::Run() { + if (fast_run_) { + return FastRun(); + } + return NormalRun(); +} + REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_StridedSlice, LiteKernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, LiteKernelCreator) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, LiteKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h index 0de0becec2..2e9f228a0f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h @@ -35,9 +35,23 @@ class StridedSliceCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + bool MatchFastPattern(); + void InitFastRunParam(); + int NormalRun(); + int FastRun(); + int FastRunImpl(int task_id); private: StridedSliceParameter *param_; + uint8_t *input_ptr_ = nullptr; + uint8_t *output_ptr_ = nullptr; + int split_axis_{-1}; + int outer_{1}; + int cal_num_per_thread_{1}; + size_t inner_size_{0}; + bool fast_run_{false}; + bool parallel_on_split_axis_{false}; + bool parallel_on_outer_{false}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc deleted file mode 100644 index 42d280768d..0000000000 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/kernel/arm/fp16/reshape_fp16.h" -#include "schema/model_generated.h" -#include "src/kernel_registry.h" -#include "include/errorcode.h" - -using mindspore::kernel::KERNEL_ARCH::kCPU; -using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_Reshape; - -namespace mindspore::kernel { - -int ReshapeFp16CPUKernel::Run() { - auto in_tensor = in_tensors_.at(kInputIndex); - auto out_tensor = out_tensors_.at(kOutputIndex); - - float16_t *input_ptr = reinterpret_cast(in_tensor->data_c()); - float16_t *output_ptr = reinterpret_cast(out_tensor->data_c()); - - Reshape(input_ptr, output_ptr, out_tensor->Size()); - - return RET_OK; -} - -REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator) -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc index 9864b33397..068478e51c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc @@ -82,7 +82,7 @@ int StackFp16CPUKernel::Run() { FreeBuffer(); return ret; } - Stack(buffers_.data(), reinterpret_cast(out_buffer_), in_tensors_.size(), copy_size_, outter_size_); + Stack(buffers_.data(), reinterpret_cast(out_buffer_), in_tensors_.size(), copy_size_, outer_size_); // if output tensor is fp32, we need to transform if (malloc_out_) { auto out_tensor = out_tensors_.at(0); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc index a77031ef2c..93fac46927 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc @@ -82,8 +82,8 @@ int ExpandDimsRun(void *cdata, int task_id) { } int ExpandDimsCPUKernel::Run() { - in_ptr_ = in_tensors_.at(0)->MutableData(); - out_ptr_ = out_tensors_.at(0)->MutableData(); + in_ptr_ = in_tensors_.at(0)->data_c(); + out_ptr_ = out_tensors_.at(0)->data_c(); auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "ExpandDimsRun error error_code[" << ret << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc deleted file mode 100644 index d138775777..0000000000 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/kernel/arm/fp32/reshape_fp32.h" -#include "schema/model_generated.h" -#include "src/kernel_registry.h" -#include "include/errorcode.h" - -using mindspore::kernel::KERNEL_ARCH::kCPU; -using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_Reshape; - -namespace mindspore::kernel { -int ReshapeCPUKernel::Init() { return RET_OK; } - -int ReshapeCPUKernel::ReSize() { return RET_OK; } - -int ReshapeCPUKernel::Run() { - auto input_ptr = in_tensors_.at(kInputIndex)->data_c(); - auto output_ptr = out_tensors_.at(kOutputIndex)->data_c(); - size_t data_size = in_tensors_.at(kInputIndex)->Size(); - MS_ASSERT(input_ptr); - MS_ASSERT(output_ptr); - Reshape(input_ptr, output_ptr, data_size); - return RET_OK; -} - -REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator) -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc index f0cf0e0272..31e0bc84b9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc @@ -75,44 +75,18 @@ int SplitRun(void *cdata, int task_id) { int SplitCPUKernel::Run() { auto in_tensor = in_tensors_.front(); - input_ptr_ = reinterpret_cast(in_tensor->MutableData()); + input_ptr_ = reinterpret_cast(in_tensor->data_c()); for (int i = 0; i < param->num_split_; i++) { - output_ptr_.at(i) = reinterpret_cast(out_tensors_.at(i)->MutableData()); + output_ptr_.at(i) = reinterpret_cast(out_tensors_.at(i)->data_c()); } auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; } - return RET_OK; } -kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; - return nullptr; - } - MS_ASSERT(desc.type == schema::PrimitiveType_Split); - auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive); - if (kernel == nullptr) { - MS_LOG(ERROR) << "new SplitCPUKernel fail!"; - free(opParameter); - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); - return nullptr; - } - return kernel; -} - REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Split, LiteKernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Split, LiteKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h deleted file mode 100644 index 5940d5c6e7..0000000000 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_ - -#include -#include "src/lite_kernel.h" -#include "nnacl/base/squeeze_base.h" - -namespace mindspore::kernel { - -class SqueezeCPUKernel : public LiteKernel { - public: - explicit SqueezeCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~SqueezeCPUKernel() override = default; - - int Init() override; - int ReSize() override; - int Run() override; -}; -} // namespace mindspore::kernel - -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_