|
|
|
@ -200,6 +200,13 @@ int ConvolutionWinogradCPUKernel::InitTmpBuffer() {
|
|
|
|
|
int ic4 = UP_DIV(conv_param_->input_channel_, C4NUM);
|
|
|
|
|
MS_ASSERT(ctx_->allocator != nullptr);
|
|
|
|
|
|
|
|
|
|
size_t tile_buffer_size = thread_count_ * C12NUM * input_unit_ * input_unit_ * ic4 * C4NUM * sizeof(float);
|
|
|
|
|
trans_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(tile_buffer_size));
|
|
|
|
|
if (trans_input_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "malloc trans_input_ failed.";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gemm_out_ = reinterpret_cast<float *>(
|
|
|
|
|
ctx_->allocator->Malloc(thread_count_ * C12NUM * input_unit_ * input_unit_ * oc8 * C8NUM * sizeof(float)));
|
|
|
|
|
if (gemm_out_ == nullptr) {
|
|
|
|
@ -290,10 +297,6 @@ int ConvolutionWinogradCPUKernel::ReSize() {
|
|
|
|
|
free(nhwc4_input_);
|
|
|
|
|
nhwc4_input_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
if (trans_input_ != nullptr) {
|
|
|
|
|
free(trans_input_);
|
|
|
|
|
trans_input_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = ConvolutionBaseCPUKernel::Init();
|
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
@ -316,14 +319,6 @@ int ConvolutionWinogradCPUKernel::ReSize() {
|
|
|
|
|
}
|
|
|
|
|
memset(nhwc4_input_, 0, nhwc4_input_size);
|
|
|
|
|
|
|
|
|
|
size_t tile_buffer_size = thread_count_ * C12NUM * input_unit_ * input_unit_ * ic4 * C4NUM * sizeof(float);
|
|
|
|
|
trans_input_ = reinterpret_cast<float *>(malloc(tile_buffer_size));
|
|
|
|
|
if (trans_input_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "malloc trans_input_ failed.";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
memset(trans_input_, 0, tile_buffer_size);
|
|
|
|
|
|
|
|
|
|
ret = ConfigInputOutput();
|
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "ConfigInputOutput failed.";
|
|
|
|
|