fix bug of arithmetic fp32 op

store dataType of weight and bias in fp16 op
pull/12647/head
fuzhiye 4 years ago
parent 79675d612e
commit 897a59e9c0

@ -136,6 +136,10 @@ int ArithmeticFP16CPUKernel::Execute(const void *input0, const void *input1, voi
} }
int ArithmeticFP16CPUKernel::Run() { int ArithmeticFP16CPUKernel::Run() {
if (CheckDataType() != RET_OK) {
MS_LOG(ERROR) << "ArithmeticFP16CPUKernel check dataType failed.";
return RET_ERROR;
}
if (!input0_broadcast_) { if (!input0_broadcast_) {
input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
} }

@ -31,11 +31,9 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
origin_weight_(origin_weight), origin_weight_(origin_weight),
origin_bias_(origin_bias), origin_bias_(origin_bias) {}
origin_weight_data_type_(origin_weight_data_type),
origin_bias_data_type_(origin_bias_data_type) {}
~Convolution1x1FP16CPUKernel() override; ~Convolution1x1FP16CPUKernel() override;
int Init() override; int Init() override;
@ -64,8 +62,6 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
float16_t *pack_input_ = nullptr; float16_t *pack_input_ = nullptr;
float16_t *output_ptr_ = nullptr; float16_t *output_ptr_ = nullptr;
MatMulParameter *matmul_param_ = nullptr; MatMulParameter *matmul_param_ = nullptr;
TypeId origin_weight_data_type_;
TypeId origin_bias_data_type_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -39,9 +39,8 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
} }
int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) { int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) {
auto weight_data_type = weight_tensor->data_type(); MS_ASSERT(origin_weight_data_type_ == kNumberTypeFloat32 || origin_weight_data_type_ == kNumberTypeFloat16);
MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16); if (origin_weight_data_type_ == kNumberTypeFloat32) {
if (weight_data_type == kNumberTypeFloat32) {
float *origin_weight = reinterpret_cast<float *>(origin_data); float *origin_weight = reinterpret_cast<float *>(origin_data);
size_t fp16_weight_size = weight_tensor->Channel() * weight_tensor->Batch() * weight_tensor->Height() * size_t fp16_weight_size = weight_tensor->Channel() * weight_tensor->Batch() * weight_tensor->Height() *
weight_tensor->Width() * sizeof(float16_t); weight_tensor->Width() * sizeof(float16_t);

@ -27,8 +27,11 @@ namespace mindspore::kernel {
class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
origin_weight_data_type_(origin_weight_data_type),
origin_bias_data_type_(origin_bias_data_type) {}
~ConvolutionBaseFP16CPUKernel() override; ~ConvolutionBaseFP16CPUKernel() override;
int Init() override { return mindspore::lite::RET_OK; } int Init() override { return mindspore::lite::RET_OK; }
@ -46,6 +49,8 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
float16_t *execute_input_ = nullptr; float16_t *execute_input_ = nullptr;
float16_t *execute_weight_ = nullptr; float16_t *execute_weight_ = nullptr;
float16_t *execute_output_ = nullptr; float16_t *execute_output_ = nullptr;
TypeId origin_weight_data_type_;
TypeId origin_bias_data_type_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -69,17 +69,13 @@ int ConvolutionDelegateFP16CPUKernel::Init() {
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
origin_bias_ = CopyData(in_tensors_.at(kBiasIndex)); origin_bias_ = CopyData(in_tensors_.at(kBiasIndex));
need_free_ = need_free_ | BIAS_NEED_FREE; need_free_ = need_free_ | BIAS_NEED_FREE;
origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type();
} }
origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type();
return RET_OK; return RET_OK;
} }
origin_weight_ = in_tensors_.at(kWeightIndex)->data_c(); origin_weight_ = in_tensors_.at(kWeightIndex)->data_c();
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
origin_bias_ = in_tensors_.at(kBiasIndex)->data_c(); origin_bias_ = in_tensors_.at(kBiasIndex)->data_c();
origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type();
} }
origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type();
return ReSize(); return ReSize();
} }
@ -110,6 +106,28 @@ ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) {
return conv_parameter; return conv_parameter;
} }
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx, void *origin_weight, void *origin_bias,
TypeId origin_weight_data_type, TypeId origin_bias_data_type) {
MS_ASSERT(opParameter != nullptr);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel;
if (conv_param->input_channel_ < 32) {
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(
opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
} else {
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(
opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
}
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
free(opParameter);
return nullptr;
}
return kernel;
}
kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias, const lite::InnerContext *ctx, void *origin_weight, void *origin_bias,
@ -119,12 +137,17 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i
int out_unit; int out_unit;
CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param); CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param);
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = CpuConvDwFp16KernelCreator(inputs, outputs, op_parameter, ctx, origin_weight, origin_bias,
origin_weight_data_type, origin_bias_data_type);
} else if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel( kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel(
op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
} else if (use_winograd) { } else if (use_winograd) {
kernel = new (std::nothrow) kernel::ConvolutionWinogradFP16CPUKernel( kernel = new (std::nothrow)
op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias, origin_bias_data_type); kernel::ConvolutionWinogradFP16CPUKernel(op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias,
origin_weight_data_type, origin_bias_data_type);
} else { } else {
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel( kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(
op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
@ -211,7 +234,13 @@ static lite::Tensor *CreateOutputTensorFp16(const std::vector<int> &out_shape,
kernel::LiteKernel *CreateDelegateConvFp16(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CreateDelegateConvFp16(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const InnerContext *ctx) { const InnerContext *ctx) {
return new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx); auto weight_data_type = inputs.at(1)->data_type();
TypeId bias_data_type = kTypeUnknown;
if (inputs.size() == 3) {
bias_data_type = inputs.at(2)->data_type();
}
return new (std::nothrow)
kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
} }
kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
@ -302,33 +331,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_); GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_);
} }
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(opParameter != nullptr);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel;
if (conv_param->input_channel_ < 32) {
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx);
} else {
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
}
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
free(opParameter);
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx, const kernel::KernelKey &desc) { const InnerContext *ctx, const kernel::KernelKey &desc) {
@ -337,12 +339,13 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
if (conv_param->group_ == 1) { bool is_depthwise =
kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx); (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_);
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx, desc); if (conv_param->group_ > 1 && !is_depthwise) {
} else {
kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx);
} else {
kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx);
} }
if (kernel == nullptr) { if (kernel == nullptr) {

@ -29,8 +29,11 @@ namespace mindspore::kernel {
class ConvolutionDelegateFP16CPUKernel : public LiteKernel { class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
public: public:
ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
: LiteKernel(parameter, inputs, outputs, ctx) {} TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: LiteKernel(parameter, inputs, outputs, ctx),
origin_weight_data_type_(origin_weight_data_type),
origin_bias_data_type_(origin_bias_data_type) {}
~ConvolutionDelegateFP16CPUKernel() override { ~ConvolutionDelegateFP16CPUKernel() override {
FreeCopiedData(); FreeCopiedData();
if (fp16_conv_kernel_ != nullptr) { if (fp16_conv_kernel_ != nullptr) {

@ -42,7 +42,7 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, weight_tensor->data_c()); auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, origin_weight_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "get execute filter data failed."; MS_LOG(ERROR) << "get execute filter data failed.";
return ret; return ret;
@ -63,8 +63,8 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_);
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex); auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); MS_ASSERT(origin_bias_);
MS_ASSERT(ori_bias); auto ori_bias = reinterpret_cast<float *>(origin_bias_);
for (int i = 0; i < bias_tensor->ElementsNum(); i++) { for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
bias_fp16[i] = (float16_t)ori_bias[i]; bias_fp16[i] = (float16_t)ori_bias[i];
} }

@ -35,8 +35,12 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
origin_weight_(origin_weight),
origin_bias_(origin_bias) {}
~ConvolutionDepthwiseFp16CPUKernel() override; ~ConvolutionDepthwiseFp16CPUKernel() override;
int Init() override; int Init() override;
@ -47,6 +51,8 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
int Execute(int task_id); int Execute(int task_id);
private: private:
void *origin_weight_; // do not free
void *origin_bias_; // do not free
float16_t *packed_weight_ = nullptr; float16_t *packed_weight_ = nullptr;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -61,7 +61,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
// init weight: o, h, w, i; o == group, i == 1 // init weight: o, h, w, i; o == group, i == 1
auto weight_tensor = in_tensors_.at(kWeightIndex); auto weight_tensor = in_tensors_.at(kWeightIndex);
int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
auto origin_weight = reinterpret_cast<float *>(weight_tensor->MutableData());
int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t))); packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
@ -69,8 +68,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
PackNCHWFp32ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), PackNCHWFp32ToNC8HW8Fp16(reinterpret_cast<float *>(origin_weight_), packed_weight_, 1,
weight_tensor->Batch()); weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t))); bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
if (bias_data_ == nullptr) { if (bias_data_ == nullptr) {
@ -81,8 +80,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_);
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex); auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); MS_ASSERT(origin_bias_);
MS_ASSERT(ori_bias); auto ori_bias = reinterpret_cast<float *>(origin_bias_);
for (int i = 0; i < bias_tensor->ElementsNum(); i++) { for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
bias_fp16[i] = (float16_t)ori_bias[i]; bias_fp16[i] = (float16_t)ori_bias[i];
} }

@ -36,8 +36,12 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
origin_weight_(origin_weight),
origin_bias_(origin_bias) {}
~ConvolutionDepthwiseSWFp16CPUKernel() override; ~ConvolutionDepthwiseSWFp16CPUKernel() override;
int Init() override; int Init() override;
@ -50,6 +54,8 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
private: private:
void FreePackedInputOutput(); void FreePackedInputOutput();
void *origin_weight_; // do not free
void *origin_bias_; // do not free
SlidingWindowParam *sliding_ = nullptr; SlidingWindowParam *sliding_ = nullptr;
float16_t *packed_weight_ = nullptr; float16_t *packed_weight_ = nullptr;
float16_t *packed_input_ = nullptr; float16_t *packed_input_ = nullptr;

@ -28,11 +28,9 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
origin_weight_(origin_weight), origin_weight_(origin_weight),
origin_bias_(origin_bias), origin_bias_(origin_bias) {}
origin_weight_data_type_(origin_weight_data_type),
origin_bias_data_type_(origin_bias_data_type) {}
~ConvolutionFP16CPUKernel() override { ~ConvolutionFP16CPUKernel() override {
if (packed_weight_ != nullptr) { if (packed_weight_ != nullptr) {
free(packed_weight_); free(packed_weight_);
@ -64,8 +62,6 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
float16_t *packed_input_ = nullptr; float16_t *packed_input_ = nullptr;
float16_t *packed_weight_ = nullptr; float16_t *packed_weight_ = nullptr;
float16_t *col_major_input_ = nullptr; float16_t *col_major_input_ = nullptr;
TypeId origin_weight_data_type_;
TypeId origin_bias_data_type_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -31,12 +31,12 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit, const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit,
void *origin_weight, void *origin_bias, TypeId origin_bias_data_type) void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
output_unit_(out_unit), output_unit_(out_unit),
origin_weight_(origin_weight), origin_weight_(origin_weight),
origin_bias_(origin_bias), origin_bias_(origin_bias) {}
origin_bias_data_type_(origin_bias_data_type) {}
~ConvolutionWinogradFP16CPUKernel() override { ~ConvolutionWinogradFP16CPUKernel() override {
if (trans_weight_ != nullptr) { if (trans_weight_ != nullptr) {
free(trans_weight_); free(trans_weight_);
@ -86,7 +86,6 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
TmpBufferAddressFp16 tmp_buffer_address_list_[4]; TmpBufferAddressFp16 tmp_buffer_address_list_[4];
InputTransFp16Func in_func_; InputTransFp16Func in_func_;
OutputTransFp16Func out_func_; OutputTransFp16Func out_func_;
TypeId origin_bias_data_type_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -37,8 +37,9 @@ namespace mindspore::kernel {
class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
~DeconvolutionDepthwiseFp16CPUKernel() override; ~DeconvolutionDepthwiseFp16CPUKernel() override;
int Init() override; int Init() override;

@ -216,21 +216,25 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *>
MS_ASSERT(op_parameter != nullptr); MS_ASSERT(op_parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
auto weight_data_type = inputs.at(1)->data_type();
TypeId bias_data_type = kTypeUnknown;
if (inputs.size() == 3) {
bias_data_type = inputs.at(2)->data_type();
}
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
if (conv_param->group_ == 1) { if (conv_param->group_ == 1) {
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
(conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) { (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) {
kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
} else { } else {
kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
} }
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
} else { DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
MS_LOG(ERROR) << "deconv do not support group deconv!";
kernel = nullptr;
} }
if (kernel == nullptr) { if (kernel == nullptr) {

@ -27,8 +27,9 @@ namespace mindspore::kernel {
class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
~DeConvolutionFp16CPUKernel() override; ~DeConvolutionFp16CPUKernel() override;
int Init() override; int Init() override;
int Run() override; int Run() override;

@ -28,8 +28,9 @@ namespace mindspore::kernel {
class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
public: public:
DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} TypeId origin_weight_data_type, TypeId origin_bias_data_type)
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
~DeConvWinogradFp16CPUKernel() override; ~DeConvWinogradFp16CPUKernel() override;
int Init() override; int Init() override;
int Run() override; int Run() override;

@ -49,11 +49,6 @@ int ArithmeticCPUKernel::Init() {
} }
int ArithmeticCPUKernel::ReSize() { int ArithmeticCPUKernel::ReSize() {
if (CheckDataType() != RET_OK) {
MS_LOG(ERROR) << "ArithmeticCPUKernel resize failed.";
return RET_ERROR;
}
CalcMultiplesAndStrides(param_); CalcMultiplesAndStrides(param_);
if (param_->broadcasting_) { if (param_->broadcasting_) {
outside_ = 1; outside_ = 1;
@ -359,6 +354,10 @@ int ArithmeticsRun(void *cdata, int task_id) {
} }
int ArithmeticCPUKernel::Run() { int ArithmeticCPUKernel::Run() {
if (CheckDataType() != RET_OK) {
MS_LOG(ERROR) << "ArithmeticCPUKernel check dataType failed.";
return RET_ERROR;
}
if (!input0_broadcast_) { if (!input0_broadcast_) {
input0_ptr_ = in_tensors_[0]->data_c(); input0_ptr_ = in_tensors_[0]->data_c();
} }

Loading…
Cancel
Save