optimize opencl_runtime and opencl_allocator

pull/8382/head
wandongdong 4 years ago
parent 2ed8ed69e4
commit 72d5c88e1b

@ -107,7 +107,7 @@ int ArithmeticOpenCLKernel::InitWeights() {
// scalar
weight[3] = weight[2] = weight[1] = weight[0];
}
auto weight_ptr_ = allocator->CreateImageFromHost(weight, pack_weight_size, img_size);
auto weight_ptr_ = allocator->Malloc(pack_weight_size, img_size, weight);
inputs_weight_ptrs_.push_back(weight_ptr_);
delete[] weight;
} else {
@ -128,7 +128,7 @@ int ArithmeticOpenCLKernel::InitWeights() {
// scalar
weight[3] = weight[2] = weight[1] = weight[0];
}
auto weight_ptr_ = allocator->CreateImageFromHost(weight, pack_weight_size, img_size);
auto weight_ptr_ = allocator->Malloc(pack_weight_size, img_size, weight);
inputs_weight_ptrs_.push_back(weight_ptr_);
delete[] weight;
}

@ -65,8 +65,8 @@ int ScaleOpenCLKernel::InitWeights() {
if (broadcast_flag_) {
img_size[1] = 1;
img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM);
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size);
scale_ptr_ = allocator->Malloc(in_tensors_[1]->ElementsNum(), img_size, in_tensors_[1]->data_c());
offset_ptr_ = allocator->Malloc(in_tensors_[2]->ElementsNum(), img_size, in_tensors_[2]->data_c());
return RET_OK;
}
auto image2d_info = Image2DInfo(in_tensors_[1]);
@ -76,8 +76,8 @@ int ScaleOpenCLKernel::InitWeights() {
int batch = image2d_info.N;
if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) {
if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) {
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size);
scale_ptr_ = allocator->Malloc(in_tensors_[1]->ElementsNum(), img_size, in_tensors_[1]->data_c());
offset_ptr_ = allocator->Malloc(in_tensors_[2]->ElementsNum(), img_size, in_tensors_[2]->data_c());
} else {
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to "
<< in_tensors_[0]->data_type();
@ -100,8 +100,8 @@ int ScaleOpenCLKernel::InitWeights() {
std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNHWCToNHWC4<float, float>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype);
PackNHWCToNHWC4<float, float>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
scale_ptr_ = allocator->Malloc(in_tensors_[1]->ElementsNum(), img_size, scale);
offset_ptr_ = allocator->Malloc(in_tensors_[2]->ElementsNum(), img_size, offset);
delete[] scale;
delete[] offset;
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
@ -119,8 +119,8 @@ int ScaleOpenCLKernel::InitWeights() {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype);
PackNHWCToNHWC4<float, float16_t>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype);
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size);
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size);
scale_ptr_ = allocator->Malloc(in_tensors_[1]->ElementsNum(), img_size, scale);
offset_ptr_ = allocator->Malloc(in_tensors_[2]->ElementsNum(), img_size, offset);
delete[] scale;
delete[] offset;
} else {

File diff suppressed because it is too large Load Diff

@ -49,8 +49,7 @@ class OpenCLAllocator : public Allocator {
~OpenCLAllocator() override;
void SetContext(const AllocatorContext &ctx) override;
void *Malloc(size_t size) override;
void *Malloc(size_t size, const std::vector<size_t> &img_size);
void *CreateImageFromHost(void *host_ptr, size_t size, const std::vector<size_t> &img_size);
void *Malloc(size_t size, const std::vector<size_t> &img_size, void *data = nullptr);
void Free(void *ptr) override;
size_t GetTotalSize() override;
@ -71,6 +70,10 @@ class OpenCLAllocator : public Allocator {
private:
void Lock();
void UnLock();
void *MinimumFit(size_t size, const std::vector<size_t> &img_size);
void *CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer);
void *CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags, cl::Buffer **buffer,
cl::Image2D **image);
struct MemBuf {
size_t size_;
void *device_ptr_;

@ -146,10 +146,11 @@ int OpenCLRuntime::Init() {
CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
if (ret != CL_SUCCESS || context_ == nullptr) {
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create special OpenCL context failed, Create common OpenCL context then.";
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
if (context_ == nullptr) {
delete device_;
MS_LOG(ERROR) << "Create OpenCL context failed!";
return RET_ERROR;
}
@ -158,7 +159,8 @@ int OpenCLRuntime::Init() {
MS_LOG(INFO) << "Create common opencl context";
context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
#endif
if (ret != CL_SUCCESS || context_ == nullptr) {
if (ret != CL_SUCCESS) {
delete device_;
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
@ -205,13 +207,18 @@ int OpenCLRuntime::Init() {
#endif
default_command_queue_ = new (std::nothrow) cl::CommandQueue(*context_, *device_, properties, &ret);
if (ret != CL_SUCCESS || default_command_queue_ == nullptr) {
if (ret != CL_SUCCESS) {
delete device_;
delete context_;
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
allocator_ = new (std::nothrow) OpenCLAllocator(this);
if (allocator_ == nullptr) {
delete device_;
delete context_;
delete default_command_queue_;
MS_LOG(ERROR) << "Command OpenCL allocator failed!";
return RET_ERROR;
}

Loading…
Cancel
Save