|
|
@ -51,6 +51,7 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
auto matrix_gt_data_fp16 = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
if (matrix_gt_data_fp16 == nullptr) {
|
|
|
|
if (matrix_gt_data_fp16 == nullptr) {
|
|
|
|
|
|
|
|
free(matrix_g_data_fp16);
|
|
|
|
MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc matrix_gt_data_fp16 failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -61,16 +62,25 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_
|
|
|
|
// separate into two steps ===> tmp = G*g ===> out = tmp * GT
|
|
|
|
// separate into two steps ===> tmp = G*g ===> out = tmp * GT
|
|
|
|
auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
auto tmp_weight_data = reinterpret_cast<float16_t *>(malloc(kernel_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
if (tmp_weight_data == nullptr) {
|
|
|
|
if (tmp_weight_data == nullptr) {
|
|
|
|
|
|
|
|
free(matrix_g_data_fp16);
|
|
|
|
|
|
|
|
free(matrix_gt_data_fp16);
|
|
|
|
MS_LOG(ERROR) << "malloc tmp_weight_data failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc tmp_weight_data failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
auto tmp_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * kernel_unit_ * sizeof(float16_t)));
|
|
|
|
if (tmp_data == nullptr) {
|
|
|
|
if (tmp_data == nullptr) {
|
|
|
|
|
|
|
|
free(tmp_weight_data);
|
|
|
|
|
|
|
|
free(matrix_g_data_fp16);
|
|
|
|
|
|
|
|
free(matrix_gt_data_fp16);
|
|
|
|
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc tmp_data failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t)));
|
|
|
|
auto trans_out_data = reinterpret_cast<float16_t *>(malloc(input_unit_ * input_unit_ * sizeof(float16_t)));
|
|
|
|
if (trans_out_data == nullptr) {
|
|
|
|
if (trans_out_data == nullptr) {
|
|
|
|
|
|
|
|
free(tmp_data);
|
|
|
|
|
|
|
|
free(tmp_weight_data);
|
|
|
|
|
|
|
|
free(matrix_g_data_fp16);
|
|
|
|
|
|
|
|
free(matrix_gt_data_fp16);
|
|
|
|
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc trans_out_data failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -206,11 +216,14 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
|
|
|
auto matrix_gt = reinterpret_cast<float *>(malloc(input_unit_ * kernel_unit_ * sizeof(float)));
|
|
|
|
if (matrix_gt == nullptr) {
|
|
|
|
if (matrix_gt == nullptr) {
|
|
|
|
|
|
|
|
free(matrix_g);
|
|
|
|
MS_LOG(ERROR) << "malloc matrix_gt failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc matrix_gt failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = MallocTransformMatrices();
|
|
|
|
ret = MallocTransformMatrices();
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
|
|
|
free(matrix_g);
|
|
|
|
|
|
|
|
free(matrix_gt);
|
|
|
|
MS_LOG(ERROR) << "Malloc transform matrices failed.";
|
|
|
|
MS_LOG(ERROR) << "Malloc transform matrices failed.";
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -221,6 +234,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
|
|
|
float matrix_bt[MAX_LEN];
|
|
|
|
float matrix_bt[MAX_LEN];
|
|
|
|
ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_);
|
|
|
|
ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, 0.5f, output_unit_, kernel_unit_);
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
|
|
|
free(matrix_g);
|
|
|
|
|
|
|
|
free(matrix_gt);
|
|
|
|
MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
|
|
|
|
MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -235,6 +250,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
|
|
|
|
|
|
|
|
|
|
|
ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block);
|
|
|
|
ret = WinogradFilterTransformFp16(execute_weight_, matrix_g, matrix_gt, oc_block);
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
|
|
|
free(matrix_g);
|
|
|
|
|
|
|
|
free(matrix_gt);
|
|
|
|
MS_LOG(ERROR) << "winograd filter transfrom failed.";
|
|
|
|
MS_LOG(ERROR) << "winograd filter transfrom failed.";
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -242,6 +259,8 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
|
|
|
|
// init bias
|
|
|
|
// init bias
|
|
|
|
bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t));
|
|
|
|
bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t));
|
|
|
|
if (bias_data_ == nullptr) {
|
|
|
|
if (bias_data_ == nullptr) {
|
|
|
|
|
|
|
|
free(matrix_g);
|
|
|
|
|
|
|
|
free(matrix_gt);
|
|
|
|
MS_LOG(ERROR) << "malloc bias_data_ failed.";
|
|
|
|
MS_LOG(ERROR) << "malloc bias_data_ failed.";
|
|
|
|
return RET_ERROR;
|
|
|
|
return RET_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|