add equality and relational operators for opencl

pull/6162/head
Corleone 4 years ago
parent 2f14c40934
commit 547fea05a6

File diff suppressed because it is too large Load Diff

@ -28,6 +28,7 @@
using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_Eltwise;
namespace mindspore::kernel {
@ -130,18 +131,18 @@ int ArithmeticOpenCLKernel::InitBuffer() {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
delete[] weight;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
int16_t *weight = new (std::nothrow) int16_t[pack_weight_size];
float16_t *weight = new (std::nothrow) float16_t[pack_weight_size];
if (weight == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
std::function<int16_t(float)> to_dtype = Float32ToShort;
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
delete[] weight;
} else {
@ -162,18 +163,18 @@ int ArithmeticOpenCLKernel::InitBuffer() {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
delete[] weight;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
int16_t *weight = new (std::nothrow) int16_t[pack_weight_size];
float16_t *weight = new (std::nothrow) float16_t[pack_weight_size];
if (weight == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
std::function<int16_t(float)> to_dtype = Float32ToShort;
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype);
weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size);
delete[] weight;
} else {
@ -197,28 +198,69 @@ int ArithmeticOpenCLKernel::Init() {
std::string kernel_name;
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
if (arithmetic_parameter->broadcasting_) {
element_flag_ = false;
kernel_name = "BoardcastArith";
kernel_name = "Broadcast";
} else {
element_flag_ = true;
switch (op_parameter_->type_) {
case PrimitiveType_Mul:
kernel_name = "ElementMul";
break;
case PrimitiveType_Add:
kernel_name = "ElementAdd";
break;
case PrimitiveType_Sub:
kernel_name = "ElementSub";
break;
case PrimitiveType_Div:
kernel_name = "ElementDiv";
break;
default:
MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_;
break;
}
kernel_name = "Element";
}
switch (op_parameter_->type_) {
case PrimitiveType_Mul:
kernel_name += "Mul";
break;
case PrimitiveType_Add:
kernel_name += "Add";
break;
case PrimitiveType_Sub:
kernel_name += "Sub";
break;
case PrimitiveType_Div:
kernel_name += "Div";
break;
case PrimitiveType_LogicalAnd:
kernel_name += "And";
break;
case PrimitiveType_LogicalOr:
kernel_name += "Or";
break;
case PrimitiveType_Maximum:
kernel_name += "Max";
break;
case PrimitiveType_Minimum:
kernel_name += "Min";
break;
case PrimitiveType_FloorDiv:
kernel_name += "FloorDiv";
break;
case PrimitiveType_FloorMod:
kernel_name += "FloorMod";
break;
case PrimitiveType_SquaredDifference:
kernel_name += "SquaredDifference";
break;
case PrimitiveType_Equal:
kernel_name += "Equal";
break;
case PrimitiveType_NotEqual:
kernel_name += "NotEqual";
break;
case PrimitiveType_Less:
kernel_name += "Less";
break;
case PrimitiveType_LessEqual:
kernel_name += "LessEqual";
break;
case PrimitiveType_Greater:
kernel_name += "Greater";
break;
case PrimitiveType_GreaterEqual:
kernel_name += "GreaterEqual";
break;
default:
MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_;
return RET_ERROR;
}
lite::STATUS error_code = RET_OK;
@ -265,26 +307,8 @@ int ArithmeticOpenCLKernel::Run() {
void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->MutableData() : weight_ptr_;
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
} else {
float value = static_cast<float *>(in_tensors_[1]->MutableData())[0];
switch (op_parameter_->type_) {
case PrimitiveType_Mul:
weight_ = value;
break;
case PrimitiveType_Add:
bias_ = value;
break;
case PrimitiveType_Sub:
bias_ = -1 * value;
break;
case PrimitiveType_Div:
weight_ = 1 / value;
break;
default:
MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_;
break;
}
runtime_->SetKernelArg(kernel_, arg_idx++, weight_);
runtime_->SetKernelArg(kernel_, arg_idx++, bias_);
float weight = static_cast<float *>(in_tensors_[1]->MutableData())[0];
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
}
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData());
@ -345,4 +369,36 @@ REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Mul, OpenCLArithmeticKernelCr
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Add, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Sub, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Div, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LogicalAnd, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LogicalOr, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Maximum, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Minimum, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_FloorDiv, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_FloorMod, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Equal, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_NotEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Less, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LessEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Greater, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_GreaterEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Eltwise, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Mul, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Add, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Sub, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Div, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Maximum, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Minimum, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_FloorDiv, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_FloorMod, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SquaredDifference, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Equal, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_NotEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Less, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LessEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Greater, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, OpenCLArithmeticKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Eltwise, OpenCLArithmeticKernelCreator)
} // namespace mindspore::kernel

@ -44,8 +44,6 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
cl::Kernel kernel_;
lite::opencl::OpenCLRuntime *runtime_;
bool element_flag_{true};
float weight_{1.f};
float bias_{.0f};
void *weight_ptr_{nullptr};
std::vector<size_t> local_size_;

@ -152,7 +152,7 @@ int ScaleOpenCLKernel::InitBuffer() {
delete[] scale;
return RET_ERROR;
}
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
@ -160,20 +160,20 @@ int ScaleOpenCLKernel::InitBuffer() {
delete[] scale;
delete[] offset;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
int16_t *scale = new (std::nothrow) int16_t[pack_weight_size];
float16_t *scale = new (std::nothrow) float16_t[pack_weight_size];
if (scale == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
int16_t *offset = new (std::nothrow) int16_t[pack_weight_size];
float16_t *offset = new (std::nothrow) float16_t[pack_weight_size];
if (offset == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
delete[] scale;
return RET_ERROR;
}
std::function<int16_t(float)> to_dtype = Float32ToShort;
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNC4HW4<float, float16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
delete[] scale;
@ -202,7 +202,7 @@ int ScaleOpenCLKernel::InitBuffer() {
delete[] scale;
return RET_ERROR;
}
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; };
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNHWC4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
@ -210,20 +210,20 @@ int ScaleOpenCLKernel::InitBuffer() {
delete[] scale;
delete[] offset;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
int16_t *scale = new (std::nothrow) int16_t[pack_weight_size];
float16_t *scale = new (std::nothrow) float16_t[pack_weight_size];
if (scale == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
return RET_ERROR;
}
int16_t *offset = new (std::nothrow) int16_t[pack_weight_size];
float16_t *offset = new (std::nothrow) float16_t[pack_weight_size];
if (offset == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed!";
delete[] scale;
return RET_ERROR;
}
std::function<int16_t(float)> to_dtype = Float32ToShort;
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNHWC4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype);
PackNHWCToNHWC4<float, float16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
delete[] scale;
@ -328,8 +328,8 @@ int ScaleOpenCLKernel::Run() {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
} else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
int16_t scale = static_cast<int16_t *>(in_tensors_[1]->MutableData())[0];
int16_t offset = static_cast<int16_t *>(in_tensors_[2]->MutableData())[0];
float16_t scale = static_cast<float16_t *>(in_tensors_[1]->MutableData())[0];
float16_t offset = static_cast<float16_t *>(in_tensors_[2]->MutableData())[0];
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
} else {

@ -300,12 +300,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
if (fp16_enable_) {
// fp16 enable, kernel will use half and read_imageh and write_imageh.
build_options_str =
"-DFLT=half -DFLT4=half4 -DFLT16=half16 "
"-DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 "
"-DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4 ";
} else {
// fp16 not enable, kernel will use float and read_imagef and write_imagef.
build_options_str =
"-DFLT=float -DFLT4=float4 -DFLT16=float16 "
"-DFLT=float -DFLT4=float4 -DFLT16=float16 -DAS_FLT4=as_float4 "
"-DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef -DTO_FLT=convert_float -DTO_FLT4=convert_float4 ";
}

Loading…
Cancel
Save