add op reduce min max prod sumsquare

pull/8636/head
chenzupeng 4 years ago
parent 62f846c484
commit 6cbe44cd9f

File diff suppressed because it is too large Load Diff

@ -41,8 +41,9 @@ using mindspore::schema::ReduceMode_ReduceSumSquare;
namespace mindspore::kernel {
std::string ReduceOpenCLKernel::GetReduceTypeStr(int type) {
static const std::map<int, std::string> reduce_type2str{{ReduceMode_ReduceMean, "mean"},
{ReduceMode_ReduceSum, "sum"}};
static const std::map<int, std::string> reduce_type2str{
{ReduceMode_ReduceMean, "Mean"}, {ReduceMode_ReduceSum, "Sum"}, {ReduceMode_ReduceMin, "Min"},
{ReduceMode_ReduceMax, "Max"}, {ReduceMode_ReduceProd, "Prod"}, {ReduceMode_ReduceSumSquare, "SumSquare"}};
auto result_iter = reduce_type2str.find(type);
if (result_iter != reduce_type2str.end()) {
return result_iter->second;
@ -50,7 +51,26 @@ std::string ReduceOpenCLKernel::GetReduceTypeStr(int type) {
return "";
}
cl_float4 ReduceOpenCLKernel::GenC4Mask() {
auto reduce_param = reinterpret_cast<ReduceParameter *>(op_parameter_);
int last_c4 = in_tensors_[0]->shape()[3] % C4NUM;
last_c4 = (C4NUM - last_c4) % 4;
static const std::map<int, float> reduce_type2init{
{ReduceMode_ReduceMean, 0.f}, {ReduceMode_ReduceSum, 0.f}, {ReduceMode_ReduceMin, 10000.f},
{ReduceMode_ReduceMax, -10000.f}, {ReduceMode_ReduceProd, 1.f}, {ReduceMode_ReduceSumSquare, 0.f}};
float init_float = reduce_type2init.find(reduce_param->mode_)->second;
cl_float4 mask = {0.f, 0.f, 0.f, 0.f};
for (int i = 0; i < last_c4; i++) {
mask.s[C4NUM - i - 1] = init_float;
}
return mask;
}
int ReduceOpenCLKernel::CheckSpecs() {
if (in_tensors_[0]->shape()[0] > 1) {
MS_LOG(ERROR) << "reduce op only support n=2";
return RET_PARAM_INVALID;
}
auto reduce_param = reinterpret_cast<ReduceParameter *>(op_parameter_);
if (GetReduceTypeStr(reduce_param->mode_).empty()) {
MS_LOG(ERROR) << "not supported reduce type:" << reduce_param->mode_;
@ -82,17 +102,21 @@ int ReduceOpenCLKernel::Prepare() {
return RET_NULL_PTR;
}
std::string kernel_name = GetReduceTypeStr(reduce_param->mode_);
if (wc_reduce_) {
kernel_name += "_WC";
}
std::string kernel_name;
if (in_tensors_[0]->shape()[reduce_param->axes_[0]] >= LOCAL_CACHE_THREAD ||
in_tensors_[0]->shape()[reduce_param->axes_[1]] >= LOCAL_CACHE_THREAD) {
use_local_ = true;
kernel_name += "_local";
kernel_name += "Local";
} else {
use_local_ = false;
kernel_name += "Global";
}
kernel_name += "_NHWC4";
if (wc_reduce_) {
kernel_name += "WC";
} else {
kernel_name += "HW";
}
kernel_name += GetReduceTypeStr(reduce_param->mode_);
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
@ -116,6 +140,9 @@ void ReduceOpenCLKernel::SetConstArgs() {
cl_int4 size = {h, w, c4, c};
int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
if (wc_reduce_) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask());
}
}
void ReduceOpenCLKernel::SetGlobalLocal() {
std::vector<int> shapex = in_tensors_[0]->shape();

@ -38,6 +38,7 @@ class ReduceOpenCLKernel : public OpenCLKernel {
void SetGlobalLocal() override;
private:
cl_float4 GenC4Mask();
static std::string GetReduceTypeStr(int type);
cl::Kernel kernel_;
Image2DInfo outShape = Image2DInfo(nullptr);

Loading…
Cancel
Save