commit
3763e201b5
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/common_func_fp16.h"
|
||||
|
||||
void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr,
|
||||
size_t output_channel, size_t plane_size, size_t oc_stride, size_t hw_stride,
|
||||
ActType act_type, int size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
for (int oc = 0; oc < output_channel; oc++) {
|
||||
int oc_div = oc / size, oc_mod = oc % size;
|
||||
for (int hw = 0; hw < plane_size; hw++) {
|
||||
int src_index = oc_div * size * hw_stride + hw * size + oc_mod;
|
||||
int dst_index = hw * oc_stride + oc;
|
||||
float16_t value = src_ptr_[src_index];
|
||||
if (bias_ptr != NULL) {
|
||||
value = value + bias_ptr[oc];
|
||||
}
|
||||
value = (act_type == ActType_Relu || act_type == ActType_Relu6) ? (MSMAX(0.f, value)) : (value);
|
||||
value = (act_type == ActType_Relu6) ? (MSMIN(6.f, value)) : (value);
|
||||
out_ptr[dst_index] = value;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void PostConvFuncFp16C8(const float16_t *c8_out, float16_t *nhwc_out, const float16_t *bias, size_t oc, size_t plane,
|
||||
size_t oc_stride, ActType act_type) {
|
||||
size_t oc8mod = oc % C8NUM;
|
||||
size_t oc8div = oc - oc8mod;
|
||||
size_t stride_size = oc_stride * sizeof(float16_t);
|
||||
PostFuncBiasReluC8Fp16(nhwc_out, c8_out, bias, oc8div, oc8mod, plane, stride_size, act_type);
|
||||
return;
|
||||
}
|
||||
|
||||
void PostConvFuncFp16C4(const float16_t *c4_out, float16_t *nhwc_out, const float16_t *bias, size_t oc, size_t plane,
|
||||
size_t plane_stride, ActType act_type) {
|
||||
PostConvFuncCommFp16(nhwc_out, c4_out, bias, oc, plane, oc, plane_stride, act_type, C4NUM);
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP16_COMMON_FUNC_FP16_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP16_COMMON_FUNC_FP16_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* deconv common */
|
||||
void PostConvFuncFp16C8(const float16_t *c8_out_ptr, float16_t *out_ptr, const float16_t *bias_ptr,
|
||||
size_t output_channel, size_t plane_size, size_t stride, ActType act_type);
|
||||
void PostFuncBiasReluC8Fp16(float16_t *dst, const float16_t *src, const float16_t *bias, size_t oc8div, size_t oc8mod,
|
||||
size_t plane_size, size_t stride, size_t relu_type);
|
||||
|
||||
/* deconv winograd */
|
||||
void PostConvFuncFp16C4(const float16_t *c4_out, float16_t *nhwc_out, const float16_t *bias, size_t output_channel,
|
||||
size_t plane_size, size_t plane_stride, ActType act_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MINDSPORE_LITE_NNACL_FP16_COMMON_FUNC_FP16_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP16_DECONV_WINOGRAD_FP16_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP16_DECONV_WINOGRAD_FP16_H_
|
||||
|
||||
#include "nnacl/fp16/winograd_transform_fp16.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
|
||||
DeConvParam *deconv_param);
|
||||
|
||||
void DeconvWgFp16(float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index, int calculate_count,
|
||||
ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
|
||||
|
||||
void DeconvWgPostFp16(float16_t *tile_out, float16_t *nc4hw4_output, ConvParameter *conv_param,
|
||||
DeConvParam *deconv_param, int calculate_count, int tile_index);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP16_DECONV_WINOGRAD_FP16_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/fp16/common_func_fp16.h"
|
||||
#include "nnacl/fp16/deconv_winograd_fp16.h"
|
||||
#include "nnacl/fp16/pack_fp16.h"
|
||||
#include "src/runtime/kernel/arm/fp16/convolution_base_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
|
||||
public:
|
||||
DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
deconv_param_ = new DeConvParam();
|
||||
for (auto &wg : deconv_param_->a_buffer_) {
|
||||
wg.buf_init_ = false;
|
||||
}
|
||||
}
|
||||
~DeConvWinogradFp16CPUKernel() override;
|
||||
int Init() override;
|
||||
int Run() override;
|
||||
int ReSize() override;
|
||||
|
||||
public:
|
||||
int DoDeconv(int task_id);
|
||||
int DeDeconvPost(int task_id);
|
||||
|
||||
private:
|
||||
int InitComputeParam();
|
||||
int InitDataParam();
|
||||
int InitParameter();
|
||||
void FreeDeconvParam();
|
||||
void FreeResizeBuf();
|
||||
|
||||
private:
|
||||
DeConvParam *deconv_param_;
|
||||
std::mutex lock_;
|
||||
float16_t *nhwc_input_ = nullptr;
|
||||
float16_t *nhwc_output_ = nullptr;
|
||||
float16_t *nc4hw4_output_ = nullptr;
|
||||
float16_t *tile_input_ = nullptr;
|
||||
float16_t *tile_output_ = nullptr;
|
||||
int thread_num_hw_;
|
||||
int thread_stride_hw_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_
|
Loading…
Reference in new issue