|
|
|
@ -38,11 +38,9 @@ constexpr size_t CI_TILE = C4NUM;
|
|
|
|
|
constexpr size_t CO_TILE = C4NUM;
|
|
|
|
|
|
|
|
|
|
int ConvolutionOpenCLKernel::Init() {
|
|
|
|
|
static int init_count = 0;
|
|
|
|
|
auto allocator = ocl_runtime_->GetAllocator();
|
|
|
|
|
auto param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
|
|
|
|
std::set<std::string> build_options;
|
|
|
|
|
init_count++;
|
|
|
|
|
use_fp16_ = ocl_runtime_->GetFp16Enable();
|
|
|
|
|
|
|
|
|
|
if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
|
|
|
|
@ -70,23 +68,59 @@ int ConvolutionOpenCLKernel::Init() {
|
|
|
|
|
TILES_XY_ = TILES_X_ * TILES_Y_;
|
|
|
|
|
use_winograd_ = UseWinograd4x4To6x6();
|
|
|
|
|
|
|
|
|
|
std::vector<int> vpara{IH_,
|
|
|
|
|
IW_,
|
|
|
|
|
OH_,
|
|
|
|
|
OW_,
|
|
|
|
|
KH_,
|
|
|
|
|
KW_,
|
|
|
|
|
CI_SLICES_,
|
|
|
|
|
CO_SLICES_,
|
|
|
|
|
param->stride_h_,
|
|
|
|
|
param->stride_w_,
|
|
|
|
|
param->pad_u_,
|
|
|
|
|
param->pad_l_,
|
|
|
|
|
param->pad_d_,
|
|
|
|
|
param->pad_r_};
|
|
|
|
|
std::string code_id;
|
|
|
|
|
for (auto &iv : vpara) {
|
|
|
|
|
code_id += "_" + std::to_string(iv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<bool> vflag{
|
|
|
|
|
use_fp16_,
|
|
|
|
|
op_format_ == schema::Format_NC4HW4,
|
|
|
|
|
op_format_ == schema::Format_NHWC4,
|
|
|
|
|
param->act_type_ == ActType_Relu6,
|
|
|
|
|
param->act_type_ == ActType_Relu,
|
|
|
|
|
param->pad_u_ || param->pad_d_,
|
|
|
|
|
OW_ % 2 == 1,
|
|
|
|
|
OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE,
|
|
|
|
|
};
|
|
|
|
|
unsigned int init_count = 0;
|
|
|
|
|
for (size_t i = 0; i < vflag.size(); ++i) {
|
|
|
|
|
init_count |= ((unsigned int)vflag[i]) << i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
code_id += "_" + std::to_string(init_count);
|
|
|
|
|
|
|
|
|
|
// build kernel
|
|
|
|
|
if (use_winograd_) {
|
|
|
|
|
MS_LOG(DEBUG) << "use winograd";
|
|
|
|
|
std::string program_name;
|
|
|
|
|
program_name = "Winograd4x4To36" + std::to_string(init_count);
|
|
|
|
|
program_name = "Winograd4x4To36" + code_id;
|
|
|
|
|
ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36());
|
|
|
|
|
ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);
|
|
|
|
|
|
|
|
|
|
program_name = "WinogradConvolution" + std::to_string(init_count);
|
|
|
|
|
program_name = "WinogradConvolution" + code_id;
|
|
|
|
|
ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution());
|
|
|
|
|
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);
|
|
|
|
|
|
|
|
|
|
program_name = "Winograd36To4x4" + std::to_string(init_count);
|
|
|
|
|
program_name = "Winograd36To4x4" + code_id;
|
|
|
|
|
ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4());
|
|
|
|
|
ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
|
|
|
|
|
} else {
|
|
|
|
|
std::string program_name = "convolution" + std::to_string(init_count);
|
|
|
|
|
std::string program_name = "convolution" + code_id;
|
|
|
|
|
std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4();
|
|
|
|
|
ocl_runtime_->LoadSource(program_name, source);
|
|
|
|
|
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
|
|
|
|
|