!4288 create image with buffer for opencl allocator

Merge pull request !4288 from liuchao/allocator
pull/4288/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 0c464ac324

@ -118,12 +118,12 @@ int ArithmeticOpenCLKernel::Init() {
int ArithmeticOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
uint32_t element_num = out_tensors_[0]->ElementsC4Num();
int arg_idx = 0;
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
if (element_flag_) {
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->Data());
} else {
@ -145,15 +145,15 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(ERROR) << "Error Operator type " << op_parameter_->type_;
break;
}
runtime_->SetKernelArg(kernel_, arg_idx++, weight_);
runtime_->SetKernelArg(kernel_, arg_idx++, bias_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, weight_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, bias_);
}
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
int H = out_tensors_[0]->Batch() * out_tensors_[0]->Height();
int W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM);
cl_int2 output_shape{W, H};
runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime->RunKernel(kernel_, global_size_, local_size_, nullptr);
return 0;
}

@ -95,11 +95,11 @@ int ConcatOpenCLKernel::Run_axis0() {
cl::CommandQueue *command_queue = ocl_runtime->GetDefaultCommandQueue();
for (auto &tensor : in_tensors_) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data()));
auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->MapBuffer(*buffer, CL_MAP_READ, tensor->Size(), command_queue, true);
}
for (auto &tensor : out_tensors_) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data()));
auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true);
}
@ -109,7 +109,7 @@ int ConcatOpenCLKernel::Run_axis0() {
for (auto tensors : {&in_tensors_, &out_tensors_}) {
for (auto &tensor : *tensors) {
auto buffer = static_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(tensor->Data()));
auto buffer = static_cast<cl::Buffer *>(allocator_->GetBuffer(tensor->Data()));
ocl_runtime->UnmapBuffer(*buffer, tensor->Data());
}
}

@ -124,9 +124,7 @@ int SoftmaxOpenCLKernel::Init() {
int SoftmaxOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
std::cout << "run" << std::endl;
// attribute
int arg_idx = 0;
if (onexone_flag_) {
int channel_size = in_tensors_[0]->shape()[1];

File diff suppressed because it is too large Load Diff

@ -39,9 +39,7 @@ struct OpenclMemory {
OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER};
};
enum class MEM_TYPE : char {
BUF, IMG
};
enum class MEM_TYPE : char { BUF, IMG };
class OpenCLAllocator : public Allocator {
public:
@ -49,17 +47,18 @@ class OpenCLAllocator : public Allocator {
~OpenCLAllocator() override;
void SetContext(const AllocatorContext &ctx) override;
void *Malloc(size_t size) override;
void *Malloc(size_t size, const std::vector<size_t>& img_size);
void *CreateImageFromHost(void *host_ptr, size_t size, const std::vector<size_t>& img_size);
void *Malloc(size_t size, const std::vector<size_t> &img_size);
void *CreateImageFromHost(void *host_ptr, size_t size, const std::vector<size_t> &img_size);
void Free(void *ptr) override;
size_t GetTotalSize() override;
void Clear() override;
void *GetDeviceBuffer(void *buffer);
void *GetImage(void *host_ptr);
void *GetBuffer(void *host_ptr);
void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true);
int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
MEM_TYPE GetMemType(void *host_ptr);
int GetImageSize(void *host_ptr, std::vector<size_t>* img_size);
int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
private:
void Lock();
@ -68,6 +67,7 @@ class OpenCLAllocator : public Allocator {
size_t size_;
void *device_ptr_;
void *host_ptr_;
void *image_ptr_;
std::vector<size_t> img_size;
};
@ -84,4 +84,3 @@ class OpenCLAllocator : public Allocator {
} // namespace mindspore::lite::opencl
#endif // MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_

@ -206,7 +206,7 @@ int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schem
if (dst_format == schema::Format_NHWC) {
auto src_data = tensor->Data();
auto dst_data = allocator_->Malloc(tensor->Size());
cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetDeviceBuffer(src_data));
cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
std::vector<size_t> img_size;
allocator_->GetImageSize(src_data, &img_size);
auto origin = cl::array < cl::size_type, 3U > {0, 0, 0};

@ -124,17 +124,12 @@ int OpenCLRuntime::Init() {
const std::string device_name = device_->getInfo<CL_DEVICE_NAME>();
const std::string device_version = device_->getInfo<CL_DEVICE_VERSION>();
const std::string opencl_version = device_->getInfo<CL_DEVICE_OPENCL_C_VERSION>();
cl_uint align;
size_t ret;
clGetDeviceInfo((*device_)(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &align, &ret);
clGetDeviceInfo((*device_)(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &image_pitch_align_, nullptr);
MS_LOG(INFO) << "Device name:\t" << device_name;
MS_LOG(INFO) << "Opencl version:\t" << device_version;
MS_LOG(INFO) << "Image alignment:\t" << align;
MS_LOG(INFO) << "Image ret:\t" << ret;
MS_LOG(INFO) << "Image pitch alignment:\t" << image_pitch_align_;
MS_LOG(INFO) << "Highest OpenCL c version:\t" << opencl_version;
MS_LOG(INFO) << "Max work item size:\t"
<< max_work_item_sizes_[0] << " : "
<< max_work_item_sizes_[1] << " : "
MS_LOG(INFO) << "Max work item size:\t" << max_work_item_sizes_[0] << " : " << max_work_item_sizes_[1] << " : "
<< max_work_item_sizes_[2];
gpu_info_ = ParseGpuInfo(device_name, device_version);
@ -540,8 +535,8 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
}
void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags,
const std::vector<size_t>& region, cl::CommandQueue *command_queue) const {
void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
}
@ -623,4 +618,3 @@ bool OpenCLRuntime::CreateProgramFromIL(const std::vector<u_char> program_binary
}
} // namespace mindspore::lite::opencl

@ -66,6 +66,7 @@ class OpenCLRuntime {
bool GetFp16Enable() const;
bool SetFp16Enable(bool enable);
const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; }
uint32_t GetImagePitchAlignment() { return image_pitch_align_; }
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; }
template <typename T>
@ -77,13 +78,13 @@ class OpenCLRuntime {
} else {
MEM_TYPE mem_type = allocator_->GetMemType(value);
if (mem_type == MEM_TYPE::BUF) {
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetDeviceBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << value;
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
} else {
cl::Image2D *buffer = reinterpret_cast<cl::Image2D *>(allocator_->GetDeviceBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << value;
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)());
}
}
}
@ -114,8 +115,8 @@ class OpenCLRuntime {
bool sync = false) const;
void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
void *MapBuffer(const cl::Image2D buffer, bool sync, int flags,
const std::vector<size_t>& region, cl::CommandQueue *command_queue = nullptr) const;
void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
cl::CommandQueue *command_queue = nullptr) const;
int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
@ -155,10 +156,10 @@ class OpenCLRuntime {
bool support_fp16_{false};
bool fp16_enable_{false};
cl_device_svm_capabilities svm_capabilities_{0};
cl_uint image_pitch_align_{0};
std::vector<size_t> max_work_item_sizes_;
};
} // namespace mindspore::lite::opencl
#endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_

Loading…
Cancel
Save