!7251 [MS][LITE[GPU]arithmetic support broadcast

Merge pull request !7251 from chenzupeng/master-lite
pull/7251/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 832b70961a

File diff suppressed because it is too large Load Diff

@ -42,7 +42,10 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
cl::Kernel kernel_;
bool element_flag_{true};
void *weight_ptr_{nullptr};
float activation_min_{-FLT_MAX};
float activation_max_{FLT_MAX};
std::vector<std::vector<int>> inputs_nhwc_shapes_;
std::vector<void *> inputs_weight_ptrs_;
std::vector<size_t> local_size_;
std::vector<size_t> global_size_;

@ -54,6 +54,12 @@ std::vector<size_t> ScaleOpenCLKernel::InitGlobalSize() const {
void ScaleOpenCLKernel::Image2dGetWorkGroupSize() {
local_size_ = {16, 16};
if (out_tensors_[0]->shape().size() == 2) {
size_t H = out_tensors_[0]->shape()[0];
size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
global_size_ = {W, H};
return;
}
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t W = out_tensors_[0]->Width();
@ -78,18 +84,23 @@ void ScaleOpenCLKernel::BufferGetWorkGroupSize() {
int ScaleOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
im_dst_x = out_tensors_[0]->Width();
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
im_dst_y = out_tensors_[0]->Batch();
im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
if (out_tensors_[0]->shape().size() == 2) {
im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM);
im_dst_y = out_tensors_[0]->shape()[0];
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
return RET_ERROR;
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
im_dst_x = out_tensors_[0]->Width();
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
im_dst_y = out_tensors_[0]->Batch();
im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else {
MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat();
return RET_ERROR;
}
}
size_t img_dtype = CL_FLOAT;
@ -114,7 +125,7 @@ int ScaleOpenCLKernel::InitBuffer() {
auto allocator = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
GetImageSize(0, &img_size);
if (in_tensors_[1]->shape().size() == 1 && axis_ == 3) {
if (scale_C_flag_) {
img_size[1] = 1;
img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM);
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size);
@ -256,8 +267,10 @@ int ScaleOpenCLKernel::Init() {
if (scale_tensor->ElementsNum() == 1) {
element_flag_ = false;
kernel_name = "BoardcastScale";
} else if (axis_ == 3 && scale_shape.size() == 1) {
} else if (((in_shape.size() == 4 && axis_ == 3) || (in_shape.size() == 2 && axis_ == 1)) &&
scale_shape.size() == 1) {
element_flag_ = true;
scale_C_flag_ = true;
kernel_name = "Scale_C";
}
} else {
@ -327,24 +340,9 @@ int ScaleOpenCLKernel::Run() {
}
}
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
int H = 0;
int W = 0;
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
W = out_tensors_[0]->Width();
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
H = out_tensors_[0]->Batch();
W = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
} else {
MS_LOG(ERROR) << "Error output type " << out_tensors_[0]->GetFormat();
return RET_ERROR;
}
cl_int2 output_shape{W, H};
cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
if (element_flag_ && axis_ == 3) {
if (element_flag_ && scale_C_flag_) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM));
}
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);

@ -42,6 +42,7 @@ class ScaleOpenCLKernel : public OpenCLKernel {
cl::Kernel kernel_;
bool element_flag_{true};
bool scale_C_flag_{false};
void *scale_ptr_{nullptr};
void *offset_ptr_{nullptr};
int axis_{0};

@ -27,6 +27,8 @@ using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Nchw2Nhwc;
using mindspore::schema::PrimitiveType_Nhwc2Nchw;
using mindspore::schema::PrimitiveType_Transpose;
namespace mindspore::kernel {
@ -141,4 +143,8 @@ kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector<lite::Tensor
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Transpose, OpenCLTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Transpose, OpenCLTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Nhwc2Nchw, OpenCLTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Nhwc2Nchw, OpenCLTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Nchw2Nhwc, OpenCLTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Nchw2Nhwc, OpenCLTransposeKernelCreator)
} // namespace mindspore::kernel

@ -276,4 +276,41 @@ void PrintTensor(lite::Tensor *tensor, int num, const std::string &out_file) {
}
allocator->UnmapBuffer(origin_data);
}
std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape) {
int n, h, w, c;
n = h = w = c = 1;
if (tensor_shape.size() == 1) {
c = tensor_shape[0];
} else if (tensor_shape.size() == 2) {
n = tensor_shape[0];
c = tensor_shape[1];
} else if (tensor_shape.size() == 3) {
n = tensor_shape[0];
h = tensor_shape[1];
c = tensor_shape[2];
} else if (tensor_shape.size() == 4) {
n = tensor_shape[0];
h = tensor_shape[1];
w = tensor_shape[2];
c = tensor_shape[3];
}
return {n, h, w, c};
}
std::vector<size_t> GetImage2dShapeFromNHWC(const std::vector<int> &tensor_shape, schema::Format format) {
if (tensor_shape.size() != 4) {
return {1, 1};
}
size_t image_x, image_y;
image_x = image_y = 1;
if (format == schema::Format_NHWC4) {
image_x = tensor_shape[2] * UP_DIV(tensor_shape[3], C4NUM);
image_y = tensor_shape[0] * tensor_shape[1];
} else if (format == schema::Format_NC4HW4) {
image_x = tensor_shape[2];
image_y = tensor_shape[0] * tensor_shape[1] * UP_DIV(tensor_shape[3], C4NUM);
}
return {image_x, image_y};
}
} // namespace mindspore::kernel

@ -48,6 +48,10 @@ void Write2File(void *mem, const std::string &file_name, int size);
void PrintTensor(lite::Tensor *tensor, int num = 10, const std::string &out_file = "");
std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape);
std::vector<size_t> GetImage2dShapeFromNHWC(const std::vector<int> &tensor_shape, schema::Format format);
template <class T1, class T2>
void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane, int channel, const std::function<T2(T1)> &to_dtype) {
int c4 = UP_DIV(channel, C4NUM);

@ -37,8 +37,7 @@ cp -fr $TEST_DATA_DIR/testPK ./data
./lite-test --gtest_filter="TestBatchnormOpenCLCI.Batchnormfp32CI*"
./lite-test --gtest_filter="TestAvgPoolingOpenCL*"
./lite-test --gtest_filter="TestConv2dTransposeOpenCL*"
./lite-test --gtest_filter="TestMatMulOpenCL.MatMul2D*"
./lite-test --gtest_filter="TestMatMulOpenCL.MatMul4D*"
./lite-test --gtest_filter="TestMatMulOpenCL*"
./lite-test --gtest_filter="TestMaxPoolingOpenCL*"
./lite-test --gtest_filter="TestReduceOpenCL*"
./lite-test --gtest_filter="TestReshapeOpenCL*"
@ -46,3 +45,5 @@ cp -fr $TEST_DATA_DIR/testPK ./data
./lite-test --gtest_filter="TestTransposeOpenCL*"
./lite-test --gtest_filter="TestArithmeticOpenCL*"
./lite-test --gtest_filter="TestScaleOpenCL*"
./lite-test --gtest_filter="TestFullConnectionOpenCL*"
./lite-test --gtest_filter="TestResizeOpenCL*"

Loading…
Cancel
Save