optimize op performance

pull/11525/head
fuzhiye 4 years ago
parent 9d00e30ed0
commit e36e11e095

@ -1,35 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
#define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
#include <string.h>
#include "nnacl/op_base.h"
#ifdef __cplusplus
extern "C" {
#endif
inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) {
memcpy(output_ptr, input_ptr, data_size);
}
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_

@ -1,38 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
#define MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
if (input_ptr == NULL || output_ptr == NULL) {
return NNACL_ERR;
}
(void)memcpy(output_ptr, input_ptr, data_size);
return NNACL_OK;
}
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_

@ -64,6 +64,7 @@
typedef enum LiteDataType {
kDataTypeFloat,
kDataTypeFloat16,
kDataTypeInt,
kDataTypeInt8,
KDataTypeBool,

@ -108,6 +108,10 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
*((int8_t *)out_data + out_offset) = *((int8_t *)in_data + in_offset);
} else if (param->data_type == kDataTypeInt) {
*((int32_t *)out_data + out_offset) = *((int32_t *)in_data + in_offset);
#ifdef ENABLE_ARM64
} else if (param->data_type == kDataTypeFloat16) {
*((float16_t *)out_data + out_offset) = *((float16_t *)in_data + in_offset);
#endif
} else {
return NNACL_ERR;
}
@ -120,3 +124,15 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
}
return NNACL_OK;
}
void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size,
size_t in_offset) {
for (size_t i = 0; i < outer; ++i) {
const uint8_t *input_ptr = input + i * in_offset;
for (int j = 0; j < split_len; ++j) {
memcpy(output, input_ptr, inner_size);
output += inner_size;
input_ptr += inner_size * stride;
}
}
}

@ -39,6 +39,9 @@ typedef struct StridedSliceParameter {
extern "C" {
#endif
int DoStridedSlice(const void *inputs, void *output, StridedSliceParameter *param);
void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size,
size_t in_offset);
#ifdef __cplusplus
}
#endif

@ -26,6 +26,7 @@ OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *prim
MS_LOG(ERROR) << "malloc ExpandDimsParameter failed.";
return nullptr;
}
expand_dims_param->type_ = primitive->Type();
memset(expand_dims_param, 0, sizeof(OpParameter));
return reinterpret_cast<OpParameter *>(expand_dims_param);
}

@ -0,0 +1,71 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/base/reshape_base.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Reshape;
namespace mindspore::kernel {
int ReshapeBaseCPUKernel::Init() { return ReSize(); }
int ReshapeBaseCPUKernel::ReSize() {
int in_data_size = in_tensors_.front()->Size();
int thread_num = context_->thread_num_;
cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num);
return RET_OK;
}
int ReshapeBaseCPUKernel::RunImpl(int task_id) {
size_t start_index = task_id * cal_max_num_per_thread_;
auto cur_in_ptr = input_ptr_ + start_index;
auto cur_out_ptr = output_ptr_ + start_index;
size_t data_size = in_tensors_.front()->Size() - start_index;
data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size;
memcpy(cur_out_ptr, cur_in_ptr, data_size);
return RET_OK;
}
int ReshapeRun(void *cdata, int task_id) {
auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
auto ret = reshape->RunImpl(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]";
return ret;
}
return RET_OK;
}
int ReshapeBaseCPUKernel::Run() {
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
return ret;
}
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
} // namespace mindspore::kernel

@ -13,32 +13,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
#include <vector>
#include "nnacl/fp16/cast_fp16.h"
#include "nnacl/base/reshape_base.h"
#include "src/lite_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class ReshapeFp16CPUKernel : public ReshapeCPUKernel {
class ReshapeBaseCPUKernel : public LiteKernel {
public:
ReshapeFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: ReshapeCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ReshapeFp16CPUKernel() = default;
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ReshapeBaseCPUKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int RunImpl(int task_id);
private:
size_t cal_max_num_per_thread_ = 0;
uint8_t *input_ptr_ = nullptr;
uint8_t *output_ptr_ = nullptr;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_

@ -13,34 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/fp32/squeeze_fp32.h"
#include "schema/model_generated.h"
#include "src/runtime/kernel/arm/base/squeeze_base.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "schema/model_generated.h"
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Squeeze;
namespace mindspore::kernel {
int SqueezeCPUKernel::Init() { return RET_OK; }
int SqueezeCPUKernel::ReSize() { return RET_OK; }
int SqueezeCPUKernel::Run() {
size_t data_size = in_tensors_.front()->Size();
int ret = DoSqueeze(in_tensors_.front()->data_c(), out_tensors_.front()->data_c(), data_size);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Do squeeze fail!ret: " << ret;
return RET_ERROR;
}
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
} // namespace mindspore::kernel

@ -13,30 +13,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "nnacl/base/reshape_base.h"
#include "src/runtime/kernel/arm/base/reshape_base.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class ReshapeCPUKernel : public LiteKernel {
class SqueezeBaseCPUKernel : public ReshapeBaseCPUKernel {
public:
ReshapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ReshapeCPUKernel() = default;
int Init() override;
int ReSize() override;
int Run() override;
SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: ReshapeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~SqueezeBaseCPUKernel() override = default;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_

@ -27,7 +27,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Stack;
namespace mindspore::kernel {
static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
static inline int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
int copy_num = 1;
if (axis > 0) {
for (int j = n_dim - 1; j > axis - 1; j--) {
@ -41,12 +41,12 @@ static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
return copy_num;
}
static size_t GetOutterSize(const std::vector<int> &in_shape, int axis) {
size_t outter_size = 1;
static inline size_t GetOuterSize(const std::vector<int> &in_shape, int axis) {
size_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outter_size *= in_shape[i];
outer_size *= in_shape[i];
}
return outter_size;
return outer_size;
}
int StackBaseCPUKernel::ReSize() {
@ -59,14 +59,13 @@ int StackBaseCPUKernel::ReSize() {
} else {
MS_ASSERT(input_nums > 1);
copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_;
outter_size_ = GetOutterSize(input0_shape, axis_);
outer_size_ = GetOuterSize(input0_shape, axis_);
}
return RET_OK;
}
int StackBaseCPUKernel::Init() {
auto input0_tensor = in_tensors_.front();
data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum();
data_type_size_ = sizeof(float);
if (!InferShapeDone()) {
return RET_OK;
}
@ -74,13 +73,21 @@ int StackBaseCPUKernel::Init() {
}
int StackBaseCPUKernel::Run() {
// malloc temporary memory to store all the inputs
size_t inputs_num = in_tensors_.size();
char **all_inputs = static_cast<char **>(context_->allocator->Malloc(inputs_num * sizeof(char *)));
if (all_inputs == nullptr) {
MS_LOG(ERROR) << "malloc all_inputs failed.";
return RET_ERROR;
}
for (size_t j = 0; j < inputs_num; ++j) {
all_inputs[j] = reinterpret_cast<char *>(in_tensors_.at(j)->data_c());
}
// run stack
auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c());
Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_);
Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outer_size_);
// free temporary variable all_inputs
context_->allocator->Free(all_inputs);
return RET_OK;
}

@ -38,7 +38,7 @@ class StackBaseCPUKernel : public LiteKernel {
int axis_ = 0;
size_t data_type_size_ = 0;
size_t copy_size_ = 0;
size_t outter_size_ = 1;
size_t outer_size_ = 1;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_

@ -33,11 +33,37 @@ int StridedSliceCPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}
void StridedSliceCPUKernel::InitFastRunParam() {
auto in_shape = in_tensors_.front()->shape();
auto out_shape = out_tensors_.front()->shape();
// cal inner, outer
for (int i = 0; i < split_axis_; ++i) {
outer_ *= in_shape[i];
}
int inner = 1;
for (size_t i = split_axis_ + 1; i < in_shape.size(); i++) {
inner *= in_shape[i];
}
inner_size_ = in_tensors_.front()->Size() / in_tensors_.front()->ElementsNum() * inner;
// decide multi-thread launch strategy
if (outer_ == 1) {
parallel_on_split_axis_ = true;
cal_num_per_thread_ = UP_DIV(out_shape[split_axis_], context_->thread_num_);
} else {
parallel_on_outer_ = true;
cal_num_per_thread_ = UP_DIV(outer_, context_->thread_num_);
}
}
int StridedSliceCPUKernel::ReSize() {
fast_run_ = MatchFastPattern();
if (fast_run_) {
InitFastRunParam();
}
if (op_parameter_ != nullptr) {
free(op_parameter_);
op_parameter_ = nullptr;
@ -51,7 +77,82 @@ int StridedSliceCPUKernel::ReSize() {
return RET_OK;
}
int StridedSliceCPUKernel::Run() {
bool StridedSliceCPUKernel::MatchFastPattern() {
// This function is seeking if that the number of only one dimension
// is different between input and output. If so, we can do some trick.
// Example 1:
// input shape info: [1, 80, 46, 40]
// output shape info: [1, 80, 20, 40]
// Example 2:
// input shape info: [1, 46, 40]
// output shape info: [1, 20, 40]
auto in_shape = in_tensors_.front()->shape();
auto out_shape = out_tensors_.front()->shape();
if (in_shape.size() != out_shape.size()) {
return false;
}
std::vector<int> axis_list;
for (size_t i = 0; i < in_shape.size(); ++i) {
if (in_shape[i] != out_shape[i]) {
axis_list.emplace_back(i);
}
}
if (axis_list.size() == 1) {
split_axis_ = axis_list.front();
return true;
}
return false;
}
int StridedSliceCPUKernel::FastRunImpl(int task_id) {
auto in_shape = in_tensors_.front()->shape();
auto out_shape = out_tensors_.front()->shape();
int begin_index = param_->begins_[split_axis_];
int caled_num = task_id * cal_num_per_thread_;
if (parallel_on_outer_) {
uint8_t *cur_in_ptr = input_ptr_ + (caled_num * in_shape[split_axis_] + begin_index) * inner_size_;
uint8_t *cur_out_ptr = output_ptr_ + caled_num * out_shape[split_axis_] * inner_size_;
int cur_outer = outer_ - caled_num;
if (cur_outer > cal_num_per_thread_) {
cur_outer = cal_num_per_thread_;
}
FastStride(cur_in_ptr, cur_out_ptr, out_shape[split_axis_], param_->strides_[split_axis_], cur_outer, inner_size_,
in_shape[split_axis_] * inner_size_);
} else {
MS_ASSERT(parallel_on_split_axis_);
uint8_t *cur_in_ptr = input_ptr_ + (caled_num * param_->strides_[split_axis_] + begin_index) * inner_size_;
uint8_t *cur_out_ptr = output_ptr_ + caled_num * inner_size_;
int cal_axis_num = out_shape[split_axis_] - caled_num;
if (cal_axis_num > cal_num_per_thread_) {
cal_axis_num = cal_num_per_thread_;
}
FastStride(cur_in_ptr, cur_out_ptr, cal_axis_num, param_->strides_[split_axis_], 1, inner_size_, 0);
}
return RET_OK;
}
int StrideRun(void *cdata, int task_id) {
auto stride = reinterpret_cast<StridedSliceCPUKernel *>(cdata);
auto ret = stride->FastRunImpl(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "StrideRun error task_id[" << task_id << "] error_code[" << ret << "]";
return ret;
}
return RET_OK;
}
int StridedSliceCPUKernel::FastRun() {
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
return ret;
}
return RET_OK;
}
int StridedSliceCPUKernel::NormalRun() {
auto input = in_tensors_.at(0);
MS_ASSERT(input);
switch (input->data_type()) {
@ -61,6 +162,9 @@ int StridedSliceCPUKernel::Run() {
case kNumberTypeFloat32:
param_->data_type = kDataTypeFloat;
break;
case kNumberTypeFloat16:
param_->data_type = kDataTypeFloat16;
break;
case kNumberTypeInt32:
param_->data_type = kDataTypeInt;
break;
@ -78,7 +182,15 @@ int StridedSliceCPUKernel::Run() {
return RET_OK;
}
int StridedSliceCPUKernel::Run() {
if (fast_run_) {
return FastRun();
}
return NormalRun();
}
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
} // namespace mindspore::kernel

@ -35,9 +35,23 @@ class StridedSliceCPUKernel : public LiteKernel {
int Init() override;
int ReSize() override;
int Run() override;
bool MatchFastPattern();
void InitFastRunParam();
int NormalRun();
int FastRun();
int FastRunImpl(int task_id);
private:
StridedSliceParameter *param_;
uint8_t *input_ptr_ = nullptr;
uint8_t *output_ptr_ = nullptr;
int split_axis_{-1};
int outer_{1};
int cal_num_per_thread_{1};
size_t inner_size_{0};
bool fast_run_{false};
bool parallel_on_split_axis_{false};
bool parallel_on_outer_{false};
};
} // namespace mindspore::kernel

@ -1,43 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/fp16/reshape_fp16.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Reshape;
namespace mindspore::kernel {
int ReshapeFp16CPUKernel::Run() {
auto in_tensor = in_tensors_.at(kInputIndex);
auto out_tensor = out_tensors_.at(kOutputIndex);
float16_t *input_ptr = reinterpret_cast<float16_t *>(in_tensor->data_c());
float16_t *output_ptr = reinterpret_cast<float16_t *>(out_tensor->data_c());
Reshape(input_ptr, output_ptr, out_tensor->Size());
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator<ReshapeFp16CPUKernel>)
} // namespace mindspore::kernel

@ -82,7 +82,7 @@ int StackFp16CPUKernel::Run() {
FreeBuffer();
return ret;
}
Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outter_size_);
Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outer_size_);
// if output tensor is fp32, we need to transform
if (malloc_out_) {
auto out_tensor = out_tensors_.at(0);

@ -82,8 +82,8 @@ int ExpandDimsRun(void *cdata, int task_id) {
}
int ExpandDimsCPUKernel::Run() {
in_ptr_ = in_tensors_.at(0)->MutableData();
out_ptr_ = out_tensors_.at(0)->MutableData();
in_ptr_ = in_tensors_.at(0)->data_c();
out_ptr_ = out_tensors_.at(0)->data_c();
auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ExpandDimsRun error error_code[" << ret << "]";

@ -1,45 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Reshape;
namespace mindspore::kernel {
int ReshapeCPUKernel::Init() { return RET_OK; }
int ReshapeCPUKernel::ReSize() { return RET_OK; }
int ReshapeCPUKernel::Run() {
auto input_ptr = in_tensors_.at(kInputIndex)->data_c();
auto output_ptr = out_tensors_.at(kOutputIndex)->data_c();
size_t data_size = in_tensors_.at(kInputIndex)->Size();
MS_ASSERT(input_ptr);
MS_ASSERT(output_ptr);
Reshape(input_ptr, output_ptr, data_size);
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeCPUKernel>)
} // namespace mindspore::kernel

@ -75,44 +75,18 @@ int SplitRun(void *cdata, int task_id) {
int SplitCPUKernel::Run() {
auto in_tensor = in_tensors_.front();
input_ptr_ = reinterpret_cast<float *>(in_tensor->MutableData());
input_ptr_ = reinterpret_cast<float *>(in_tensor->data_c());
for (int i = 0; i < param->num_split_; i++) {
output_ptr_.at(i) = reinterpret_cast<float *>(out_tensors_.at(i)->MutableData());
output_ptr_.at(i) = reinterpret_cast<float *>(out_tensors_.at(i)->data_c());
}
auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
}
kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx, const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!";
free(opParameter);
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
return nullptr;
}
return kernel;
}
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Split, LiteKernelCreator<SplitCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Split, LiteKernelCreator<SplitCPUKernel>)
} // namespace mindspore::kernel

@ -1,40 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/base/squeeze_base.h"
namespace mindspore::kernel {
class SqueezeCPUKernel : public LiteKernel {
public:
explicit SqueezeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~SqueezeCPUKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_
Loading…
Cancel
Save