!4288 create image with buffer for opencl allocator

Merge pull request !4288 from liuchao/allocator
pull/4288/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 0c464ac324

@ -118,12 +118,12 @@ int ArithmeticOpenCLKernel::Init() {
int ArithmeticOpenCLKernel::Run() { int ArithmeticOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
auto runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
uint32_t element_num = out_tensors_[0]->ElementsC4Num(); uint32_t element_num = out_tensors_[0]->ElementsC4Num();
int arg_idx = 0;
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
if (element_flag_) { if (element_flag_) {
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->Data()); runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->Data());
} else { } else {
@ -145,15 +145,15 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_; MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_;
break; break;
} }
runtime_->SetKernelArg(kernel_, arg_idx++, weight_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, weight_);
runtime_->SetKernelArg(kernel_, arg_idx++, bias_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, bias_);
} }
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
int H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); int H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
int W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); int W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
cl_int2 output_shape{W, H}; cl_int2 output_shape{W, H};
runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape);
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); ocl_runtime->RunKernel(kernel_, global_size_, local_size_, nullptr);
return 0; return 0;
} }

@ -95,11 +95,11 @@ int ConcatOpenCLKernel::Run_axis0() {
cl::CommandQueue *command_queue = ocl_runtime->GetDefaultCommandQueue(); cl::CommandQueue *command_queue = ocl_runtime->GetDefaultCommandQueue();
for (auto &tensor : in_tensors_) { for (auto &tensor : in_tensors_) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data())); auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->MapBuffer(*buffer, CL_MAP_READ, tensor->Size(), command_queue, true); ocl_runtime->MapBuffer(*buffer, CL_MAP_READ, tensor->Size(), command_queue, true);
} }
for (auto &tensor : out_tensors_) { for (auto &tensor : out_tensors_) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data())); auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true); ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true);
} }
@ -109,7 +109,7 @@ int ConcatOpenCLKernel::Run_axis0() {
for (auto tensors : {&in_tensors_, &out_tensors_}) { for (auto tensors : {&in_tensors_, &out_tensors_}) {
for (auto &tensor : *tensors) { for (auto &tensor : *tensors) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data())); auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->UnmapBuffer(*buffer, tensor->Data()); ocl_runtime->UnmapBuffer(*buffer, tensor->Data());
} }
} }

@ -124,9 +124,7 @@ int SoftmaxOpenCLKernel::Init() {
int SoftmaxOpenCLKernel::Run() { int SoftmaxOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
std::cout << "run" << std::endl;
// attribute
int arg_idx = 0; int arg_idx = 0;
if (onexone_flag_) { if (onexone_flag_) {
int channel_size = in_tensors_[0]->shape()[1]; int channel_size = in_tensors_[0]->shape()[1];

@ -43,7 +43,18 @@ void OpenCLAllocator::UnLock() {
} }
} }
void *OpenCLAllocator::Malloc(size_t size) { void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector<size_t>{}); }
void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) {
auto ocl_runtime = opencl::OpenCLRuntime::GetInstance();
auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
size_t img_pitch = 0;
if (!img_size.empty()) {
uint32_t image_alignment = ocl_runtime->GetImagePitchAlignment();
img_pitch = (img_size[0] + image_alignment - 1) / image_alignment * image_alignment;
size = img_pitch * img_size[1] * sizeof(cl_float4);
}
if (size > MAX_MALLOC_SIZE) { if (size > MAX_MALLOC_SIZE) {
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
return nullptr; return nullptr;
@ -52,17 +63,23 @@ void *OpenCLAllocator::Malloc(size_t size) {
auto iter = free_list_.lower_bound(size); auto iter = free_list_.lower_bound(size);
if (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { if (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) {
auto mem_buf = iter->second; auto mem_buf = iter->second;
bool is_match{mem_buf->img_size.size() == img_size.size()};
for (int i = 0; i < img_size.size() && is_match; ++i) {
is_match &= img_size[i] == mem_buf->img_size[i];
}
if (is_match) {
free_list_.erase(iter); free_list_.erase(iter);
allocated_list_[mem_buf->host_ptr_] = mem_buf; allocated_list_[mem_buf->host_ptr_] = mem_buf;
UnLock(); UnLock();
MS_LOG(DEBUG) << "Malloc buffer from free list. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ MS_LOG(DEBUG) << "Malloc Image2D from free list. size: " << mem_buf->size_
<< ", device addr: " << mem_buf->device_ptr_; << ", host addr: " << mem_buf->host_ptr_ << ", device addr: " << mem_buf->device_ptr_;
return mem_buf->host_ptr_; return mem_buf->host_ptr_;
} }
auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); }
auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
void *host_ptr = nullptr; void *host_ptr = nullptr;
void *device_ptr = nullptr; void *device_ptr = nullptr;
void *image_ptr = nullptr;
if (svm_capabilities && svm_on_) { if (svm_capabilities && svm_on_) {
cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0; cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0;
flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0; flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0;
@ -70,9 +87,9 @@ void *OpenCLAllocator::Malloc(size_t size) {
host_ptr = clSVMAlloc((*ocl_runtime->Context())(), flags, size, 0); host_ptr = clSVMAlloc((*ocl_runtime->Context())(), flags, size, 0);
} else { } else {
cl_int ret = CL_SUCCESS; cl_int ret = CL_SUCCESS;
cl::Buffer *buffer = new cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, cl::Buffer *buffer = new (std::nothrow)
size, NULL, &ret); cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret);
if (ret != CL_SUCCESS) { if (buffer == nullptr || ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
UnLock(); UnLock();
return nullptr; return nullptr;
@ -86,78 +103,32 @@ void *OpenCLAllocator::Malloc(size_t size) {
} }
cl::Memory *mem = buffer; cl::Memory *mem = buffer;
ocl_runtime->UnmapBuffer(*mem, host_ptr); ocl_runtime->UnmapBuffer(*mem, host_ptr);
} if (!img_size.empty()) {
std::unique_ptr<MemBuf> mem_buf = std::make_unique<MemBuf>();
mem_buf->size_ = size;
mem_buf->device_ptr_ = device_ptr;
mem_buf->host_ptr_ = host_ptr;
MS_LOG(DEBUG) << "Malloc a new buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
<< ", device addr: " << mem_buf->device_ptr_;
allocated_list_[host_ptr] = mem_buf.release();
UnLock();
return host_ptr;
}
void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size) {
if (size > MAX_MALLOC_SIZE) {
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
return nullptr;
}
auto ocl_runtime = opencl::OpenCLRuntime::GetInstance();
Lock();
auto iter = free_list_.lower_bound(size);
if (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) {
auto mem_buf = iter->second;
bool is_match{mem_buf->img_size.size() == img_size.size()};
for (int i = 0; i < img_size.size() && is_match; ++i) {
is_match &= img_size[i] == mem_buf->img_size[i];
}
if (is_match) {
free_list_.erase(iter);
allocated_list_[mem_buf->host_ptr_] = mem_buf;
UnLock();
MS_LOG(DEBUG) << "Malloc Image2D from free list. size: " << mem_buf->size_
<< ", host addr: " << mem_buf->host_ptr_ << ", device addr: " << mem_buf->device_ptr_;
return mem_buf->host_ptr_;
}
}
void *host_ptr = nullptr;
void *device_ptr = nullptr;
cl_int ret = CL_SUCCESS;
// CL_HALF_FLOAT, CL_FLOAT
cl::ImageFormat image_format(CL_RGBA, img_size[2]); cl::ImageFormat image_format(CL_RGBA, img_size[2]);
cl::Image2D *buffer = new cl::Image2D(*ocl_runtime->Context(), CL_MEM_READ_WRITE, image_format, cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), image_format, *buffer, img_size[0],
img_size[0], img_size[1], 0, nullptr, &ret); img_size[1], img_pitch * sizeof(cl_float4), &ret);
if (ret != CL_SUCCESS) { if (image == nullptr || ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL Image2D failed!" << kernel::CLErrorCode(ret); MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
UnLock(); UnLock();
delete buffer;
return nullptr; return nullptr;
} }
device_ptr = static_cast<void *>(buffer); image_ptr = static_cast<void *>(image);
std::vector<size_t> region{img_size[0], img_size[1], 1}; }
host_ptr = ocl_runtime->MapBuffer(*buffer, 0, CL_MAP_READ | CL_MAP_WRITE, region);
if (host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
UnLock();
return nullptr;
} }
cl::Memory *mem = buffer;
ocl_runtime->UnmapBuffer(*mem, host_ptr);
std::unique_ptr<MemBuf> mem_buf = std::make_unique<MemBuf>(); std::unique_ptr<MemBuf> mem_buf = std::make_unique<MemBuf>();
mem_buf->size_ = size; mem_buf->size_ = size;
mem_buf->device_ptr_ = device_ptr; mem_buf->device_ptr_ = device_ptr;
mem_buf->host_ptr_ = host_ptr; mem_buf->host_ptr_ = host_ptr;
mem_buf->image_ptr_ = image_ptr;
mem_buf->img_size = img_size; mem_buf->img_size = img_size;
MS_LOG(DEBUG) << "Malloc a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ MS_LOG(DEBUG) << "Malloc a new buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
<< ", device addr: " << mem_buf->device_ptr_; << ", device addr: " << mem_buf->device_ptr_ << ", image_addr: " << image_ptr;
allocated_list_[host_ptr] = mem_buf.release(); allocated_list_[host_ptr] = mem_buf.release();
UnLock(); UnLock();
return host_ptr; return host_ptr;
} }
void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::vector<size_t>& img_size) { void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::vector<size_t> &img_size) {
if (size > MAX_MALLOC_SIZE) { if (size > MAX_MALLOC_SIZE) {
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
return nullptr; return nullptr;
@ -182,34 +153,36 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
} }
void *host_ptr = nullptr; void *host_ptr = nullptr;
void *device_ptr = nullptr; void *device_ptr = nullptr;
void *image_ptr = nullptr;
cl_int ret = CL_SUCCESS; cl_int ret = CL_SUCCESS;
// CL_HALF_FLOAT, CL_FLOAT // CL_HALF_FLOAT, CL_FLOAT
cl::ImageFormat image_format(CL_RGBA, img_size[2]); cl::ImageFormat image_format(CL_RGBA, img_size[2]);
cl::Image2D *buffer = new cl::Image2D(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
image_format, img_size[0], img_size[1], 0, data, &ret); image_format, img_size[0], img_size[1], 0, data, &ret);
if (ret != CL_SUCCESS) { if (image == nullptr || ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL Image2D failed - " << kernel::CLErrorCode(ret); MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
UnLock(); UnLock();
delete buffer; delete image;
return nullptr; return nullptr;
} }
device_ptr = static_cast<void *>(buffer); image_ptr = static_cast<void *>(image);
std::vector<size_t> region{img_size[0], img_size[1], 1}; std::vector<size_t> region{img_size[0], img_size[1], 1};
host_ptr = ocl_runtime->MapBuffer(*buffer, 0, CL_MAP_READ | CL_MAP_WRITE, region); host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
if (host_ptr == nullptr) { if (host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
UnLock(); UnLock();
return nullptr; return nullptr;
} }
cl::Memory *mem = buffer; cl::Memory *mem = image;
ocl_runtime->UnmapBuffer(*mem, host_ptr); ocl_runtime->UnmapBuffer(*mem, host_ptr);
std::unique_ptr<MemBuf> mem_buf = std::make_unique<MemBuf>(); std::unique_ptr<MemBuf> mem_buf = std::make_unique<MemBuf>();
mem_buf->size_ = size; mem_buf->size_ = size;
mem_buf->device_ptr_ = device_ptr; mem_buf->device_ptr_ = device_ptr;
mem_buf->image_ptr_ = image_ptr;
mem_buf->host_ptr_ = host_ptr; mem_buf->host_ptr_ = host_ptr;
mem_buf->img_size = img_size; mem_buf->img_size = img_size;
MS_LOG(DEBUG) << "Malloc a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ MS_LOG(DEBUG) << "Malloc a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
<< ", device addr: " << mem_buf->device_ptr_; << ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_;
allocated_list_[host_ptr] = mem_buf.release(); allocated_list_[host_ptr] = mem_buf.release();
UnLock(); UnLock();
return host_ptr; return host_ptr;
@ -246,7 +219,15 @@ size_t OpenCLAllocator::GetTotalSize() {
return totalSize; return totalSize;
} }
void *OpenCLAllocator::GetDeviceBuffer(void *buffer) { void *OpenCLAllocator::GetImage(void *buffer) {
auto it = allocated_list_.find(buffer);
if (it != allocated_list_.end()) {
return it->second->image_ptr_;
}
return nullptr;
}
void *OpenCLAllocator::GetBuffer(void *buffer) {
auto it = allocated_list_.find(buffer); auto it = allocated_list_.find(buffer);
if (it != allocated_list_.end()) { if (it != allocated_list_.end()) {
return it->second->device_ptr_; return it->second->device_ptr_;
@ -313,8 +294,8 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
} else { } else {
cl::ImageFormat image_format(CL_RGBA, mem_buf->img_size[2]); cl::ImageFormat image_format(CL_RGBA, mem_buf->img_size[2]);
std::vector<size_t> region{mem_buf->img_size[0], mem_buf->img_size[1], 1}; std::vector<size_t> region{mem_buf->img_size[0], mem_buf->img_size[1], 1};
cl::Image2D *buffer = static_cast<cl::Image2D *>(mem_buf->device_ptr_); cl::Image2D *image = static_cast<cl::Image2D *>(mem_buf->image_ptr_);
new_host_ptr = ocl_runtime->MapBuffer(*buffer, 0, CL_MAP_READ | CL_MAP_WRITE, region); new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
} }
if (new_host_ptr == nullptr) { if (new_host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << mem_buf->device_ptr_ << ", host_ptr=" << host_ptr; MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << mem_buf->device_ptr_ << ", host_ptr=" << host_ptr;
@ -325,6 +306,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
allocated_list_.erase(it); allocated_list_.erase(it);
allocated_list_[new_host_ptr] = mem_buf; allocated_list_[new_host_ptr] = mem_buf;
UnLock(); UnLock();
MS_LOG(DEBUG) << "Map buffer form " << host_ptr << " to " << new_host_ptr;
return new_host_ptr; return new_host_ptr;
} }
@ -342,8 +324,9 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
return 1; return 1;
} }
cl::Buffer *buffer = static_cast<cl::Buffer *>(it->second->device_ptr_); cl::Memory *mem =
return ocl_runtime->UnmapBuffer(*buffer, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue)); static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_);
return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue));
} }
MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
@ -365,7 +348,7 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
return mem_type; return mem_type;
} }
int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector<size_t>* img_size) { int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector<size_t> *img_size) {
Lock(); Lock();
auto it = allocated_list_.find(host_ptr); auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) { if (it == allocated_list_.end()) {

@ -39,9 +39,7 @@ struct OpenclMemory {
OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER}; OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER};
}; };
enum class MEM_TYPE : char { enum class MEM_TYPE : char { BUF, IMG };
BUF, IMG
};
class OpenCLAllocator : public Allocator { class OpenCLAllocator : public Allocator {
public: public:
@ -49,17 +47,18 @@ class OpenCLAllocator : public Allocator {
~OpenCLAllocator() override; ~OpenCLAllocator() override;
void SetContext(const AllocatorContext &ctx) override; void SetContext(const AllocatorContext &ctx) override;
void *Malloc(size_t size) override; void *Malloc(size_t size) override;
void *Malloc(size_t size, const std::vector<size_t>& img_size); void *Malloc(size_t size, const std::vector<size_t> &img_size);
void *CreateImageFromHost(void *host_ptr, size_t size, const std::vector<size_t>& img_size); void *CreateImageFromHost(void *host_ptr, size_t size, const std::vector<size_t> &img_size);
void Free(void *ptr) override; void Free(void *ptr) override;
size_t GetTotalSize() override; size_t GetTotalSize() override;
void Clear() override; void Clear() override;
void *GetDeviceBuffer(void *buffer); void *GetImage(void *host_ptr);
void *GetBuffer(void *host_ptr);
void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true); void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true);
int UnmapBuffer(void *host_ptr, void *command_queue = nullptr); int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
MEM_TYPE GetMemType(void *host_ptr); MEM_TYPE GetMemType(void *host_ptr);
int GetImageSize(void *host_ptr, std::vector<size_t>* img_size); int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
private: private:
void Lock(); void Lock();
@ -68,6 +67,7 @@ class OpenCLAllocator : public Allocator {
size_t size_; size_t size_;
void *device_ptr_; void *device_ptr_;
void *host_ptr_; void *host_ptr_;
void *image_ptr_;
std::vector<size_t> img_size; std::vector<size_t> img_size;
}; };
@ -84,4 +84,3 @@ class OpenCLAllocator : public Allocator {
} // namespace mindspore::lite::opencl } // namespace mindspore::lite::opencl
#endif // MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_ #endif // MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_

@ -206,7 +206,7 @@ int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schem
if (dst_format == schema::Format_NHWC) { if (dst_format == schema::Format_NHWC) {
auto src_data = tensor->Data(); auto src_data = tensor->Data();
auto dst_data = allocator_->Malloc(tensor->Size()); auto dst_data = allocator_->Malloc(tensor->Size());
cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetDeviceBuffer(src_data)); cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
std::vector<size_t> img_size; std::vector<size_t> img_size;
allocator_->GetImageSize(src_data, &img_size); allocator_->GetImageSize(src_data, &img_size);
auto origin = cl::array < cl::size_type, 3U > {0, 0, 0}; auto origin = cl::array < cl::size_type, 3U > {0, 0, 0};

@ -124,17 +124,12 @@ int OpenCLRuntime::Init() {
const std::string device_name = device_->getInfo<CL_DEVICE_NAME>(); const std::string device_name = device_->getInfo<CL_DEVICE_NAME>();
const std::string device_version = device_->getInfo<CL_DEVICE_VERSION>(); const std::string device_version = device_->getInfo<CL_DEVICE_VERSION>();
const std::string opencl_version = device_->getInfo<CL_DEVICE_OPENCL_C_VERSION>(); const std::string opencl_version = device_->getInfo<CL_DEVICE_OPENCL_C_VERSION>();
cl_uint align; clGetDeviceInfo((*device_)(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &image_pitch_align_, nullptr);
size_t ret;
clGetDeviceInfo((*device_)(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &align, &ret);
MS_LOG(INFO) << "Device name:\t" << device_name; MS_LOG(INFO) << "Device name:\t" << device_name;
MS_LOG(INFO) << "Opencl version:\t" << device_version; MS_LOG(INFO) << "Opencl version:\t" << device_version;
MS_LOG(INFO) << "Image alignment:\t" << align; MS_LOG(INFO) << "Image pitch alignment:\t" << image_pitch_align_;
MS_LOG(INFO) << "Image ret:\t" << ret;
MS_LOG(INFO) << "Highest OpenCL c version:\t" << opencl_version; MS_LOG(INFO) << "Highest OpenCL c version:\t" << opencl_version;
MS_LOG(INFO) << "Max work item size:\t" MS_LOG(INFO) << "Max work item size:\t" << max_work_item_sizes_[0] << " : " << max_work_item_sizes_[1] << " : "
<< max_work_item_sizes_[0] << " : "
<< max_work_item_sizes_[1] << " : "
<< max_work_item_sizes_[2]; << max_work_item_sizes_[2];
gpu_info_ = ParseGpuInfo(device_name, device_version); gpu_info_ = ParseGpuInfo(device_name, device_version);
@ -540,8 +535,8 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size); return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
} }
void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
const std::vector<size_t>& region, cl::CommandQueue *command_queue) const { cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) { if (command_queue == nullptr) {
command_queue = default_command_queue_.get(); command_queue = default_command_queue_.get();
} }
@ -623,4 +618,3 @@ bool OpenCLRuntime::CreateProgramFromIL(const std::vector<u_char> program_binary
} }
} // namespace mindspore::lite::opencl } // namespace mindspore::lite::opencl

@ -66,6 +66,7 @@ class OpenCLRuntime {
bool GetFp16Enable() const; bool GetFp16Enable() const;
bool SetFp16Enable(bool enable); bool SetFp16Enable(bool enable);
const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; } const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; }
uint32_t GetImagePitchAlignment() { return image_pitch_align_; }
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; } cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; }
template <typename T> template <typename T>
@ -77,13 +78,13 @@ class OpenCLRuntime {
} else { } else {
MEM_TYPE mem_type = allocator_->GetMemType(value); MEM_TYPE mem_type = allocator_->GetMemType(value);
if (mem_type == MEM_TYPE::BUF) { if (mem_type == MEM_TYPE::BUF) {
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(value)); cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << value; MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)()); return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
} else { } else {
cl::Image2D *buffer = reinterpret_cast<cl::Image2D *>(allocator_->GetDeviceBuffer(value)); cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << value; MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)()); return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)());
} }
} }
} }
@ -114,8 +115,8 @@ class OpenCLRuntime {
bool sync = false) const; bool sync = false) const;
void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr, void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const; bool sync = false) const;
void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
const std::vector<size_t>& region, cl::CommandQueue *command_queue = nullptr) const; cl::CommandQueue *command_queue = nullptr) const;
int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr, int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const; bool sync = false) const;
int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
@ -155,10 +156,10 @@ class OpenCLRuntime {
bool support_fp16_{false}; bool support_fp16_{false};
bool fp16_enable_{false}; bool fp16_enable_{false};
cl_device_svm_capabilities svm_capabilities_{0}; cl_device_svm_capabilities svm_capabilities_{0};
cl_uint image_pitch_align_{0};
std::vector<size_t> max_work_item_sizes_; std::vector<size_t> max_work_item_sizes_;
}; };
} // namespace mindspore::lite::opencl } // namespace mindspore::lite::opencl
#endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_ #endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_

Loading…
Cancel
Save