fix setargs bug for prelu and fp16 bug for argminmax

pull/8815/head
wandongdong 4 years ago
parent a574cdb4e5
commit 5c65180d0a

@ -80,7 +80,8 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
auto out_shape_align = in_shape_align; auto out_shape_align = in_shape_align;
out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_; out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_;
int reduce_len = GetUpPow2(in_shape.at(param->axis_)); int reduce_len = GetUpPow2(in_shape.at(param->axis_));
cus_size_ = {reduce_len, static_cast<int>(im_in_.RowPitch() / C4NUM), 1, 1}; int dtype_size = in_tensors_[0]->data_type() == kNumberTypeFloat16 ? sizeof(int16_t) : sizeof(float);
cus_size_ = {reduce_len, static_cast<int>(im_in_.RowPitch() / dtype_size), 1, 1};
cus_size_.s[2] = UP_ROUND(im_in_.width * C4NUM, cus_size_.s[1]) - im_in_.width * C4NUM; cus_size_.s[2] = UP_ROUND(im_in_.width * C4NUM, cus_size_.s[1]) - im_in_.width * C4NUM;
cus_size_.s[3] = im_in_.W * UP_ROUND(param->topk_, C4NUM); cus_size_.s[3] = im_in_.W * UP_ROUND(param->topk_, C4NUM);
cus_size_.s[3] = UP_ROUND(cus_size_.s[3], cus_size_.s[1]) - cus_size_.s[3]; cus_size_.s[3] = UP_ROUND(cus_size_.s[3], cus_size_.s[1]) - cus_size_.s[3];

@ -125,7 +125,7 @@ int PReluOpenCLKernel::Run() {
if (weight_is_scalar) { if (weight_is_scalar) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_);
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF);
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2);

@ -211,14 +211,14 @@ int SubGraphOpenCLKernel::Init() {
} }
nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end());
UpdateTensorDataType();
ret = SubGraphKernel::Prepare(); ret = SubGraphKernel::Prepare();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "OpenCL prepare fail"; MS_LOG(ERROR) << "OpenCL prepare fail";
return ret; return ret;
} }
UpdateTensorDataType();
MallocTensorWithReuse(); MallocTensorWithReuse();
return RET_OK; return RET_OK;
} }

@ -83,7 +83,7 @@ void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::B
} }
void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags, void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags,
cl::Buffer **buffer, cl::Image2D **image) { bool is_map, cl::Buffer **buffer, cl::Image2D **image) {
cl_int ret = CL_SUCCESS; cl_int ret = CL_SUCCESS;
cl::ImageFormat image_format(CL_RGBA, img_size[2]); cl::ImageFormat image_format(CL_RGBA, img_size[2]);
if (data == nullptr) { if (data == nullptr) {
@ -99,16 +99,19 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
return nullptr; return nullptr;
} }
MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1]; MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1];
std::vector<size_t> region{img_size[0], img_size[1], 1}; void *host_ptr = nullptr;
void *host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region); if (is_map) {
if (host_ptr == nullptr) { std::vector<size_t> region{img_size[0], img_size[1], 1};
delete *buffer; host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
delete *image; if (host_ptr == nullptr) {
MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; delete *buffer;
return nullptr; delete *image;
MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr;
return nullptr;
}
cl::Memory *mem = *image;
ocl_runtime_->UnmapBuffer(*mem, host_ptr);
} }
cl::Memory *mem = *image;
ocl_runtime_->UnmapBuffer(*mem, host_ptr);
return host_ptr; return host_ptr;
} }
@ -149,11 +152,12 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size,
} }
} }
if (!img_size.empty()) { if (!img_size.empty()) {
host_ptr = CreateImage2D(size, img_size, data, flags, &buffer, &image); void *host_ptr_im = CreateImage2D(size, img_size, data, flags, data != nullptr, &buffer, &image);
if (host_ptr == nullptr) { if (data != nullptr && host_ptr_im == nullptr) {
UnLock(); UnLock();
return nullptr; return nullptr;
} }
host_ptr = (data != nullptr) ? host_ptr_im : host_ptr;
} }
} }
MemBuf *mem_buf = new (std::nothrow) MemBuf; MemBuf *mem_buf = new (std::nothrow) MemBuf;

@ -72,8 +72,8 @@ class OpenCLAllocator : public Allocator {
void UnLock(); void UnLock();
void *MinimumFit(size_t size, const std::vector<size_t> &img_size); void *MinimumFit(size_t size, const std::vector<size_t> &img_size);
void *CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer); void *CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer);
void *CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags, cl::Buffer **buffer, void *CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags, bool is_map,
cl::Image2D **image); cl::Buffer **buffer, cl::Image2D **image);
struct MemBuf { struct MemBuf {
size_t size_; size_t size_;
void *device_ptr_; void *device_ptr_;

@ -44,7 +44,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis0topk2index) {
std::vector<int> output_shape = {2, 2, 2, 2}; std::vector<int> output_shape = {2, 2, 2, 2};
float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16};
float output_data[] = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2}; float output_data[] = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -59,7 +59,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis0topk2value) {
std::vector<int> output_shape = {2, 2, 2, 2}; std::vector<int> output_shape = {2, 2, 2, 2};
float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16};
float output_data[] = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16}; float output_data[] = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -75,7 +75,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis1topk2index) {
float input_data[] = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, float input_data[] = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30,
10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23};
float output_data[] = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0}; float output_data[] = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -92,7 +92,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis1topk2value) {
10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23};
float output_data[] = {100, 12, 200, 34, 50, 36, 11, 6, 17, 10, 35, 30, float output_data[] = {100, 12, 200, 34, 50, 36, 11, 6, 17, 10, 35, 30,
18, 80, 90, 40, 22, 120, 10, 20, 30, 10, 11, 60}; 18, 80, 90, 40, 22, 120, 10, 20, 30, 10, 11, 60};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -109,7 +109,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk1index) {
10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12,
10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12}; 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12};
float output_data[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; float output_data[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -121,12 +121,13 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk2value) {
int topk = 2; int topk = 2;
bool out_value = true; bool out_value = true;
std::vector<int> input_shape = {2, 2, 3, 5}; std::vector<int> input_shape = {2, 2, 3, 5};
std::vector<int> output_shape = {1, 2, 2, 5}; std::vector<int> output_shape = {2, 2, 2, 5};
float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90,
20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50,
30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
float output_data[] = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; float output_data[] = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50,
for (auto fp16_enable : {false}) { 30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50};
for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -144,7 +145,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis2topk2index) {
30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
float output_data[] = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, float output_data[] = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1,
2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -161,7 +162,7 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2index) {
20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50,
30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
float output_data[] = {4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1}; float output_data[] = {4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
@ -179,10 +180,9 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) {
30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
float output_data[] = {90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45, float output_data[] = {90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45,
90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45}; 90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45};
for (auto fp16_enable : {false}) { for (auto fp16_enable : {false, true}) {
auto *param = CreateParameter(type, axis, topk, out_value); auto *param = CreateParameter(type, axis, topk, out_value);
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
} }
} }
} // namespace mindspore::lite::opencl::test } // namespace mindspore::lite::opencl::test

Loading…
Cancel
Save