optimization concat ops

pull/9012/head
Pengyongrong 4 years ago
parent 4d9d5852aa
commit e62958bd05

@ -40,7 +40,7 @@ set(LITE_SRC
if (SUPPORT_GPU)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/subgraph_opencl_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_subgraph.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_executor.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_allocator.cc

File diff suppressed because it is too large Load Diff

@ -64,54 +64,107 @@ void ConcatGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *
}
int ConcatOpenCLKernel::CheckSpecs() {
if (in_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << " only support dim = 4 ";
return RET_ERROR;
}
auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_);
MS_LOG(DEBUG) << " concat at axis=: " << param->axis_;
if (param->axis_ < 0) {
param->axis_ += in_tensors_.front()->shape().size();
if (out_tensors_[0]->shape().size() > 4) {
MS_LOG(ERROR) << " GPU Unsupported shape.size > 4 "
<< "your shape().size()=: " << out_tensors_[0]->shape().size();
return RET_ERROR;
}
if (param->axis_ < 0 || param->axis_ > 3) {
axis_ = param->axis_;
if (axis_ < 0) {
axis_ += in_tensors_.front()->shape().size();
}
if (axis_ < 0 || axis_ > 3) {
MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 ";
return RET_ERROR;
}
if (out_tensors_[0]->shape().size() < 4 && op_parameter_->type_ == PrimitiveType_Concat && axis_ != 0) {
if (out_tensors_[0]->shape().size() == 2) {
axis_ = axis_ + 2;
} else if (out_tensors_[0]->shape().size() == 3) {
axis_ = axis_ + 1;
} else {
MS_LOG(ERROR) << " Unsupported axis =: " << axis_ << " shape().size()=: " << out_tensors_[0]->shape().size();
return RET_ERROR;
}
}
return RET_OK;
}
void ConcatOpenCLKernel::SetConstArgs() {
auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_);
auto output_shape = out_tensors_[0]->shape();
cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)};
int arg_cn = 2 * in_tensors_.size() + 1;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
GpuTensorInfo img_info(out_tensors_[0]);
size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
stride_w = img_info.RowPitch() / dtype;
cl_int4 output_shape_ = {};
for (int i = 0; i < out_tensors_[0]->shape().size(); ++i) {
output_shape_.s[i] = out_tensors_[0]->shape()[i];
}
Broadcast2GpuShape(out_shape_.s, output_shape_.s, out_tensors_[0]->shape().size(), 1);
int arg_cn = in_tensors_.size() + 1;
if (axis_ == 3 && !Align_) {
for (int i = 0; i < in_tensors_.size(); ++i) {
cl_int4 temp = {};
for (int j = 0; j < in_tensors_[i]->shape().size(); ++j) {
temp.s[j] = in_tensors_[i]->shape()[j];
}
Broadcast2GpuShape(in_shape_.s, temp.s, in_tensors_[i]->shape().size(), 1);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
}
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
} else {
for (int i = 0; i < in_tensors_.size(); ++i) {
cl_int4 temp = {};
for (int j = 0; j < in_tensors_[i]->shape().size(); ++j) {
temp.s[j] = in_tensors_[i]->shape()[j];
}
Broadcast2GpuShape(in_shape_.s, temp.s, in_tensors_[i]->shape().size(), 1);
in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
}
}
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
}
void ConcatOpenCLKernel::SetGlobalLocal() {
auto output_shape = out_tensors_[0]->shape();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1};
uint32_t OH = output_shape[0] * output_shape[1];
uint32_t OW = output_shape[2];
uint32_t OC = output_shape[3];
std::vector<size_t> global = {OH, OW, OC};
if (axis_ == 3 && !Align_) {
OH = out_shape_.s[0] * out_shape_.s[1];
OW = out_shape_.s[2];
global = {OH, OW, 1};
local = {1, 1, 1};
} else {
OH = out_shape_.s[0] * out_shape_.s[1];
OW = out_shape_.s[2];
OC = out_shape_.s[3];
global = {OH, OW, OC};
local = {1, 1, 1};
}
ConcatGetWorkGroup(global, &local, max_global[0]);
OpenCLKernel::AlignGlobalLocal(global, local);
}
int ConcatOpenCLKernel::Prepare() {
auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_);
for (int i = 0; i < in_tensors_.size(); ++i) {
int length = in_tensors_[0]->shape().size();
if (in_tensors_[i]->shape()[length - 1] % C4NUM != 0) {
Align_ = false;
}
}
enable_fp16_ = ocl_runtime_->GetFp16Enable();
std::string kernel_name = "Concat";
if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 6) {
kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(param->axis_);
if (axis_ == 3 && !Align_) {
kernel_name += "Input" + std::to_string(in_tensors_.size()) + "UnAlign";
} else {
MS_LOG(ERROR) << " input must be 2 , 3 , 4 or 6";
return RET_ERROR;
if (2 <= in_tensors_.size() && in_tensors_.size() <= 6) {
kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(axis_);
} else {
MS_LOG(ERROR) << " input must be less than 6 and more than 2 ";
return RET_ERROR;
}
}
kernel_name += "_NHWC4";
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
std::string source = concat_source;
@ -119,27 +172,25 @@ int ConcatOpenCLKernel::Prepare() {
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetGlobalLocal();
SetConstArgs();
SetGlobalLocal();
return RET_OK;
}
int ConcatOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_);
if (param->axis_ == 0) {
if (axis_ == 0) {
return RunAxis0();
}
if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 6) {
if (2 <= in_tensors_.size() && in_tensors_.size() <= 6) {
int arg_cn = 0;
for (int i = 0; i < in_tensors_.size(); ++i) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c());
}
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
for (int i = 0; i < in_tensors_.size(); ++i) {
cl_int4 temp = {in_tensors_[i]->shape()[0], in_tensors_[i]->shape()[1], in_tensors_[i]->shape()[2],
UP_DIV(in_tensors_[i]->shape()[3], C4NUM)};
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, temp);
if (axis_ == 3 && !Align_) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
} else {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
}
} else {
MS_LOG(ERROR) << "unsupported input size :" << in_tensors_.size();

@ -39,8 +39,20 @@ class ConcatOpenCLKernel : public OpenCLKernel {
int Run() override;
private:
int RunAxis0();
std::vector<size_t> local;
uint32_t OH = {1};
uint32_t OW = {1};
uint32_t OC = {1};
std::vector<size_t> global;
bool Align_{true};
bool enable_fp16_{false};
cl_int stride_w{1};
cl_int4 in_shape_{};
cl_int4 out_shape_{};
int axis_{0};
private:
int RunAxis0();
cl::Kernel kernel_;
};

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "src/runtime/kernel/opencl/opencl_subgraph.h"
#include <set>
#include "src/runtime/opencl/opencl_executor.h"
#include "src/runtime/kernel/opencl/utils.h"
@ -26,11 +26,11 @@ using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::lite::opencl::MemType;
SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); }
OpenCLSubGraph::~OpenCLSubGraph() { UnInit(); }
void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull(
const std::vector<lite::Tensor *> &in_tensors, const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
MemType mem_type) {
void OpenCLSubGraph::ReplaceOutTensorAndKernelToNull(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
MemType mem_type) {
for (size_t i = 0; i < in_tensors.size(); ++i) {
for (auto &jv : in_kernels.at(i)) {
MS_ASSERT(jv);
@ -62,10 +62,10 @@ void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull(
}
}
void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor,
const std::vector<kernel::LiteKernel *> &in_kernels,
lite::Tensor *new_tensor,
kernel::LiteKernel *in_convert_op, MemType mem_type) {
void OpenCLSubGraph::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor,
const std::vector<kernel::LiteKernel *> &in_kernels,
lite::Tensor *new_tensor, kernel::LiteKernel *in_convert_op,
MemType mem_type) {
MS_ASSERT(in_convert_op);
auto in_opencl_op = reinterpret_cast<OpenCLKernel *>(in_convert_op);
for (auto &iv : in_kernels) {
@ -96,11 +96,11 @@ void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor
}
}
int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
std::vector<lite::Tensor *> *out_tensors,
std::vector<OpenCLToFormatParameter *> *out_parameters,
std::vector<LiteKernel *> *out_convert_ops, MemType mem_type) {
int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels,
std::vector<lite::Tensor *> *out_tensors,
std::vector<OpenCLToFormatParameter *> *out_parameters,
std::vector<LiteKernel *> *out_convert_ops, MemType mem_type) {
MS_ASSERT(out_tensors);
MS_ASSERT(out_parameters);
MS_ASSERT(out_convert_ops);
@ -120,7 +120,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
auto *new_tensor = new (std::nothrow) lite::Tensor();
MS_ASSERT(new_tensor);
if (new_tensor == nullptr) {
MS_LOG(ERROR) << "SubGraphOpenCLKernel new tensor failed!";
MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!";
return RET_ERROR;
}
new_tensor->CopyTensor(*in_tensors[i]);
@ -141,7 +141,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter)));
MS_ASSERT(parameter);
if (parameter == nullptr) {
MS_LOG(ERROR) << "SubGraphOpenCLKernel new parameter failed!";
MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!";
delete new_tensor;
new_tensor = nullptr;
return RET_ERROR;
@ -161,7 +161,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
}
MS_ASSERT(in_convert_op);
if (in_convert_op == nullptr) {
MS_LOG(ERROR) << "SubGraphOpenCLKernel create op failed!";
MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
delete new_tensor;
new_tensor = nullptr;
free(parameter);
@ -189,7 +189,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
return RET_OK;
}
int SubGraphOpenCLKernel::Init() {
int OpenCLSubGraph::Init() {
allocator_ = ocl_runtime_->GetAllocator();
MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size();
for (const auto tensor : in_tensors_) {
@ -233,7 +233,7 @@ int SubGraphOpenCLKernel::Init() {
return RET_OK;
}
void SubGraphOpenCLKernel::UpdateTensorDataType() {
void OpenCLSubGraph::UpdateTensorDataType() {
bool is_fp16 = ocl_runtime_->GetFp16Enable();
MS_ASSERT(in_tensors_[0]);
if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) {
@ -253,7 +253,7 @@ void SubGraphOpenCLKernel::UpdateTensorDataType() {
}
}
int SubGraphOpenCLKernel::MallocTensorWithReuse() {
int OpenCLSubGraph::MallocTensorWithReuse() {
int ret;
kernel::LiteKernelUtil::InitTensorRefCount(nodes_);
for (auto *kernel : nodes_) {
@ -297,10 +297,9 @@ int SubGraphOpenCLKernel::MallocTensorWithReuse() {
return RET_OK;
}
void SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> &in_kernels,
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels,
bool is_from) {
void OpenCLSubGraph::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> &in_kernels,
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from) {
std::vector<std::set<lite::Tensor *>> ksets;
for (auto jv : in_kernels) {
MS_ASSERT(jv);
@ -321,32 +320,24 @@ void SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor
}
}
void SubGraphOpenCLKernel::GetInOutNodes() {
std::vector<std::set<lite::Tensor *>> ksets_in;
std::vector<std::set<lite::Tensor *>> ksets_out;
for (auto jv : nodes_) {
MS_ASSERT(jv);
std::set<lite::Tensor *> kset;
kset.insert(jv->in_tensors().begin(), jv->in_tensors().end());
ksets_in.emplace_back(kset);
kset.clear();
kset.insert(jv->out_tensors().begin(), jv->out_tensors().end());
ksets_out.emplace_back(kset);
}
for (size_t j = 0; j < nodes_.size(); ++j) {
if (std::find_if(in_tensors_.begin(), in_tensors_.end(),
[&ksets_in, &j](lite::Tensor *val) { return ksets_in[j].count(val); }) != in_tensors_.end()) {
in_nodes_.emplace_back(nodes_.at(j));
void OpenCLSubGraph::GetInOutNodes() {
for (auto *node : nodes_) {
for (auto *tensor : node->in_tensors()) {
if (std::find(in_tensors_.begin(), in_tensors_.end(), tensor) != in_tensors_.end()) {
in_nodes_.emplace_back(node);
break;
}
}
if (std::find_if(out_tensors_.begin(), out_tensors_.end(),
[&ksets_out, &j](lite::Tensor *val) { return ksets_out[j].count(val); }) != out_tensors_.end()) {
out_nodes_.emplace_back(nodes_.at(j));
for (auto *tensor : node->out_tensors()) {
if (std::find(out_tensors_.begin(), out_tensors_.end(), tensor) != out_tensors_.end()) {
out_nodes_.emplace_back(node);
break;
}
}
}
}
int SubGraphOpenCLKernel::Prepare() {
int OpenCLSubGraph::Prepare() {
executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor();
if (executor_ == nullptr) {
MS_LOG(ERROR) << "Create OpenCLExecutor fail";
@ -361,7 +352,7 @@ int SubGraphOpenCLKernel::Prepare() {
return RET_OK;
}
void SubGraphOpenCLKernel::UnInit() {
void OpenCLSubGraph::UnInit() {
for (const auto &tensor : in_convert_tensors_) {
delete tensor;
}
@ -379,11 +370,11 @@ void SubGraphOpenCLKernel::UnInit() {
delete this->executor_;
}
int SubGraphOpenCLKernel::InferShape() { return RET_OK; }
int OpenCLSubGraph::InferShape() { return RET_OK; }
int SubGraphOpenCLKernel::ReSize() { return RET_OK; }
int OpenCLSubGraph::ReSize() { return RET_OK; }
int SubGraphOpenCLKernel::Run() {
int OpenCLSubGraph::Run() {
if (executor_ == nullptr) {
MS_LOG(ERROR) << "executor is nullptr";
return RET_ERROR;

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_
#include <set>
#include <vector>
@ -25,25 +25,19 @@
#include "src/sub_graph_kernel.h"
namespace mindspore::kernel {
struct SubGraphOpenCLParameter {
OpParameter op_parameter;
int input_size;
int output_size;
};
class SubGraphOpenCLKernel : public SubGraphKernel {
class OpenCLSubGraph : public SubGraphKernel {
public:
SubGraphOpenCLKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels,
const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx = nullptr)
OpenCLSubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
const lite::InnerContext *ctx = nullptr)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) {
ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
subgraph_type_ = kGpuSubGraph;
this->name_ = "GpuSubGraph";
nodes_set_.insert(nodes.begin(), nodes.end());
}
~SubGraphOpenCLKernel() override;
~OpenCLSubGraph() override;
int PreProcess() override { return mindspore::lite::RET_OK; }
int PostProcess() override { return mindspore::lite::RET_OK; }

@ -25,7 +25,7 @@
#include "src/kernel_registry.h"
#include "src/sub_graph_kernel.h"
#if SUPPORT_GPU
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "src/runtime/kernel/opencl/opencl_subgraph.h"
#include "src/runtime/opencl/opencl_runtime.h"
#endif
@ -241,7 +241,7 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
if (type == kernel::kGpuSubGraph) {
#if SUPPORT_GPU
auto sub_kernel = new (std::nothrow)
kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
return sub_kernel;
#else
return nullptr;

@ -83,7 +83,7 @@ if (SUPPORT_GPU)
set(KERNEL_OP_SRC
${KERNEL_OP_SRC}
${GPU_KERNEL_OP_SRC}
${LITE_DIR}/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
${LITE_DIR}/src/runtime/kernel/opencl/opencl_subgraph.cc
${LITE_DIR}/src/runtime/kernel/opencl/utils.cc
)
endif()

@ -19,12 +19,12 @@
#include "common/common_test.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h"
using mindspore::kernel::BiasAddOpenCLKernel;
using mindspore::kernel::LiteKernel;
using mindspore::kernel::SubGraphOpenCLKernel;
using mindspore::kernel::OpenCLSubGraph;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
@ -157,7 +157,7 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
MS_LOG(INFO) << "initialize sub_graph";
std::vector<kernel::LiteKernel *> kernels{biasadd_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({input_tensor}, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({input_tensor}, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(ERROR) << "Create sub_graph kernel error.";
delete input_tensor;

@ -19,7 +19,7 @@
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h"
// PrimitiveType_Cast: src/ops/populate/cast_populate.cc
@ -93,9 +93,9 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) {
}
MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{cast_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed ";
for (auto tensor : inputs) {
delete tensor;
}
@ -179,9 +179,9 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) {
}
MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{cast_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed ";
for (auto tensor : inputs) {
delete tensor;
}

@ -17,11 +17,11 @@
#include <algorithm>
#include "ut/src/runtime/kernel/opencl/common.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "src/runtime/kernel/opencl/opencl_subgraph.h"
#include "nnacl/conv_parameter.h"
using mindspore::kernel::LiteKernel;
using mindspore::kernel::SubGraphOpenCLKernel;
using mindspore::kernel::OpenCLSubGraph;
using mindspore::lite::KernelRegistry;
using mindspore::schema::Format::Format_NHWC;
@ -99,12 +99,12 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
// simulating benchmark: session_->CompileGraph() -> scheduler.Schedule() -> ConstructSubGraphs()
MS_LOG(DEBUG) << "create SubGraph";
std::vector<LiteKernel *> kernels{kernel};
auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels);
auto sub_graph = new (std::nothrow) OpenCLSubGraph(subgraph_inputs, {&output}, kernels, kernels, kernels);
if (sub_graph == nullptr) {
return;
}
// simulating benchmark: session_->CompileGraph() -> PrepareKernels() -> SubGraphOpenCLKernel.Prepare()
// simulating benchmark: session_->CompileGraph() -> PrepareKernels() -> OpenCLSubGraph.Prepare()
MS_LOG(DEBUG) << "call sub_graph->Prepare()";
EXPECT_TRUE(sub_graph->Prepare() == RET_OK); // will set Tensor's allocator be OpenCLAllocator
@ -128,7 +128,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
memcpy(input->data_c(), subgraph_inputs_data[input], input->Size());
}
// simulating benchmark: MarkAccuracy() -> session_->RunGraph() -> executor_->Run() -> SubGraphOpenCLKernel->Run()
// simulating benchmark: MarkAccuracy() -> session_->RunGraph() -> executor_->Run() -> OpenCLSubGraph->Run()
MS_LOG(DEBUG) << "run SubGraph & compare result";
EXPECT_TRUE(sub_graph->Run() == RET_OK); // will call UnmapBuffer() for input

@ -21,7 +21,7 @@ namespace mindspore::lite::opencl::test {
class TestOpenCL_Conv2D : public CommonTest {};
namespace {
// PrimitiveType_Concat: src/ops/populate/conv2d_populate.cc
// PrimitiveType_Conv2D: src/ops/populate/conv2d_populate.cc
ConvParameter *CreateParameter(const std::string &attr, ActType act_type) {
auto *param = test::CreateParameter<ConvParameter>(schema::PrimitiveType_Conv2D);
param->act_type_ = act_type;

@ -18,7 +18,7 @@
#include "src/common/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h"
using mindspore::lite::Tensor;
using mindspore::schema::PrimitiveType_Fill;
@ -70,9 +70,9 @@ TEST_F(TestFillOpenCLCI, Fp32testfill) {
fill_kernel->Init();
MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{fill_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({&in_tensor1}, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed ";
delete param;
delete fill_kernel;
return;
@ -126,9 +126,9 @@ TEST_F(TestFillOpenCLCI, Fp32testshape) {
fill_kernel->Init();
MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{fill_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({&in_tensor1}, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed ";
delete param;
delete fill_kernel;
return;

@ -18,7 +18,7 @@
#include "src/common/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/power.h"
// PrimitiveType_Power: src/ops/populate/power_populate.cc
@ -88,9 +88,9 @@ void TEST_MAIN(const T *input_data1, const T *input_data2, const T *expect_data,
MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{power_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed ";
delete param;
delete power_kernel;
return;

@ -20,13 +20,13 @@
#include "common/common_test.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h"
#include "mindspore/lite/nnacl/prelu_parameter.h"
using mindspore::kernel::LiteKernel;
using mindspore::kernel::OpenCLSubGraph;
using mindspore::kernel::PReluOpenCLKernel;
using mindspore::kernel::SubGraphOpenCLKernel;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
@ -150,7 +150,7 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
MS_LOG(INFO) << "initialize sub_graph";
std::vector<kernel::LiteKernel *> kernels{prelu_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({input_tensor}, outputs, kernels, kernels, kernels);
auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({input_tensor}, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
MS_LOG(ERROR) << "Create kernel sub_graph error";
delete input_tensor;

@ -19,7 +19,7 @@
#include "common/common_test.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h"
namespace mindspore::lite::opencl::test {
@ -69,7 +69,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) {
inputs[0]->MallocData(allocator);
std::vector<kernel::LiteKernel *> kernels{arith_kernel};
auto pGraph_ptr = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs, outputs, kernels, kernels, kernels);
auto pGraph_ptr = std::make_unique<kernel::OpenCLSubGraph>(inputs, outputs, kernels, kernels, kernels);
auto pGraph = pGraph_ptr.get();
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph create error.";

Loading…
Cancel
Save