!9196 [MS][LITE][5.0.5.100][GPU] fix bug of opencl gather, that raise Segmentation fault

From: @pengyongrong
Reviewed-by: @ddwsky,@ddwsky
Signed-off-by: @ddwsky,@ddwsky
pull/9196/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 3a84eec4ba

@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() {
MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size();
return RET_ERROR; return RET_ERROR;
} }
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (in_tensors_.at(1)->category() == lite::Tensor::VAR) { if (!in_tensors_.at(1)->IsConst() && enable_fp16_) {
MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight."; MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16 ";
return RET_ERROR; return RET_ERROR;
} }
int input_ndim = in_tensors_.front()->shape().size(); int input_ndim = in_tensors_.front()->shape().size();
if (input_ndim < 0 || input_ndim > 4) { if (input_ndim < 0 || input_ndim > 4) {
MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D.";
@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() {
TypeId data_type = in_tensors_.at(1)->data_type(); TypeId data_type = in_tensors_.at(1)->data_type();
if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 &&
data_type != kNumberTypeFloat16) { data_type != kNumberTypeFloat16) {
MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor."; MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor.";
return RET_ERROR; return RET_ERROR;
} }
@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() {
ocl_runtime_->LoadSource(program_name, gather_source); ocl_runtime_->LoadSource(program_name, gather_source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
#endif #endif
if (!in_tensors_.at(1)->IsConst()) {
intensor1_is_tensor = true;
}
int ret = InitWeights(); if (!intensor1_is_tensor) {
if (ret != RET_OK) { int ret = InitWeights();
return ret; if (ret != RET_OK) {
return ret;
}
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); SetConstArgs();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
int GatherOpenCLKernel::ConvertTensorToweight() {
auto allocator = ocl_runtime_->GetAllocator();
GpuTensorInfo img_info(in_tensors_[1]);
size_t dtype = sizeof(cl_int);
stride_w = img_info.RowPitch() / dtype;
auto indices_tensor = in_tensors_.at(1);
auto indices_num = indices_tensor->ElementsNum();
indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num));
allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
if (indices_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
return RET_ERROR;
}
auto data_type = indices_tensor->data_type();
auto data = indices_tensor->data_c();
if (data_type == kNumberTypeInt32) {
for (int i = 0; i < indices_num; i++) {
indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w];
}
} else {
MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 "
<< " But Your Type is :" << data_type;
return RET_ERROR;
}
allocator->UnmapBuffer(indices_data_);
return RET_OK;
}
int GatherOpenCLKernel::InitWeights() { int GatherOpenCLKernel::InitWeights() {
auto indices_tensor = in_tensors_.at(1); auto indices_tensor = in_tensors_.at(1);
auto indices_num = indices_tensor->ElementsNum(); auto indices_num = indices_tensor->ElementsNum();
@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() {
int GatherOpenCLKernel::Run() { int GatherOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
if (intensor1_is_tensor) {
ConvertTensorToweight();
}
ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c());
ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c());
ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF);

@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
void SetConstArgs() override; void SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }
int ConvertTensorToweight();
protected: protected:
int UpdateWeights(); int UpdateWeights();
@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel {
private: private:
int32_t *indices_data_{nullptr}; int32_t *indices_data_{nullptr};
int axis_ = {0}; int axis_ = {0};
bool intensor1_is_tensor{false};
bool enable_fp16_{false};
cl_int stride_w{1};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif #endif

@ -34,7 +34,7 @@ namespace mindspore::kernel {
int ToFormatOpenCLKernel::CheckSpecs() { int ToFormatOpenCLKernel::CheckSpecs() {
auto data_type = in_tensors_.front()->data_type(); auto data_type = in_tensors_.front()->data_type();
if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) {
MS_LOG(ERROR) << "Unsupported data type " << data_type; MS_LOG(ERROR) << "Unsupported data type " << data_type;
return RET_ERROR; return RET_ERROR;
} }
@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() {
} }
int ToFormatOpenCLKernel::Prepare() { int ToFormatOpenCLKernel::Prepare() {
std::map<TypeId, std::string> dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}}; std::map<TypeId, std::string> dtype_str{
{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}};
std::string kernel_name; std::string kernel_name;
if (out_mem_type_ == MemType::IMG) { if (out_mem_type_ == MemType::IMG) {
kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()];

@ -68,7 +68,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
memcpy(tensor->MutableData(), input_data, tensor->Size()); memcpy(tensor->MutableData(), input_data, tensor->Size());
} }
} else { } else {
EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32); EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32);
subgraph_inputs.push_back(tensor); subgraph_inputs.push_back(tensor);
subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data); subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data);
} }

@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) {
} }
} }
TEST_F(TestOpenCL_Gather, Axis0_Tensor) {
int axis = 0;
std::vector<int> input_shape = {10};
std::vector<int> indices_shape = {2};
std::vector<int> output_shape = {2};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int32_t indices[] = {1, 3};
float output_data[] = {1, 3};
for (auto fp16_enable : {false}) {
auto *param = CreateParameter(axis);
TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
{output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
}
}
TEST_F(TestOpenCL_Gather, Axis1) { TEST_F(TestOpenCL_Gather, Axis1) {
int axis = 1; int axis = 1;
std::vector<int> input_shape = {1, 5, 4, 4}; std::vector<int> input_shape = {1, 5, 4, 4};
@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) {
} }
} }
TEST_F(TestOpenCL_Gather, Axis1_intensor1) {
int axis = 1;
std::vector<int> input_shape = {1, 5, 4, 4};
std::vector<int> indices_shape = {2};
std::vector<int> output_shape = {1, 2, 4, 4};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
int32_t indices_int32[] = {1, 3};
int64_t indices_int64[] = {1, 3};
float32_t indices_fp32[] = {1, 3};
float16_t indices_fp16[] = {1, 3};
TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16};
void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16};
for (int i = 0; i < 1; ++i) {
for (auto fp16_enable : {false}) {
auto *param = CreateParameter(axis);
TestMain(
{{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}},
{output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
}
}
}
TEST_F(TestOpenCL_Gather, Axis2) { TEST_F(TestOpenCL_Gather, Axis2) {
int axis = 2; int axis = 2;
std::vector<int> input_shape = {1, 5, 4, 4}; std::vector<int> input_shape = {1, 5, 4, 4};
@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) {
} }
} }
TEST_F(TestOpenCL_Gather, Axis2_intensor1) {
int axis = 2;
std::vector<int> input_shape = {1, 5, 4, 4};
std::vector<int> indices_shape = {2};
std::vector<int> output_shape = {1, 5, 2, 4};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
int32_t indices[] = {1, 3};
float output_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39,
44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79};
for (auto fp16_enable : {false}) {
auto *param = CreateParameter(axis);
TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
{output_shape, output_data}, param, fp16_enable);
}
}
TEST_F(TestOpenCL_Gather, Axis3) { TEST_F(TestOpenCL_Gather, Axis3) {
int axis = 3; int axis = 3;
std::vector<int> input_shape = {1, 5, 4, 4}; std::vector<int> input_shape = {1, 5, 4, 4};
@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) {
} }
} }
TEST_F(TestOpenCL_Gather, Axis3_intensor1) {
int axis = 3;
std::vector<int> input_shape = {1, 5, 4, 4};
std::vector<int> indices_shape = {2};
std::vector<int> output_shape = {1, 5, 4, 2};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
int32_t indices[] = {1, 3};
float output_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39,
41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79};
for (auto fp16_enable : {false}) {
auto *param = CreateParameter(axis);
TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
{output_shape, output_data}, param, fp16_enable);
}
}
} // namespace mindspore::lite::opencl::test } // namespace mindspore::lite::opencl::test

Loading…
Cancel
Save