!8443 [MS][LITE][GPU]lite gpu add spacetodepth

From: @chenzupeng
Reviewed-by: @ddwsky,@zhang_xue_tong
Signed-off-by: @ddwsky
pull/8443/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 8aa78c2c8e

@ -0,0 +1,56 @@
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#define C4NUM 4
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void SpaceToDepth(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape,
int4 out_shape, int block_size, int ci_size) {
int X = get_global_id(0); // C4
int Y = get_global_id(1); // W
int Z = get_global_id(2); // H * N
if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return;
int N = Z / out_shape.y;
int H = Z % out_shape.y;
int co_base = X * C4NUM;
FLT result[C4NUM] = {0.f};
for (int i = 0; i < C4NUM; i++) {
int co = co_base + i;
int ci = co % ci_size;
int hw_block = co / ci_size;
int hi = H * block_size + hw_block / block_size;
int wi = Y * block_size + hw_block % block_size;
int ci4 = ci / C4NUM;
int ci4_ramainder = ci % C4NUM;
FLT4 tmp = READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci4, N * in_shape.y + hi));
if (ci4_ramainder == 0) {
result[i] = tmp.x;
} else if (ci4_ramainder == 1) {
result[i] = tmp.y;
} else if (ci4_ramainder == 2) {
result[i] = tmp.z;
} else {
result[i] = tmp.w;
}
}
FLT4 result_flt4 = {result[0], result[1], result[2], result[3]};
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result_flt4);
}
__kernel void SpaceToDepthAlign(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape,
int4 out_shape, int block_size, int ci_size) {
int X = get_global_id(0); // C4
int Y = get_global_id(1); // W
int Z = get_global_id(2); // H * N
if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return;
int N = Z / out_shape.y;
int H = Z % out_shape.y;
int ni = N;
int ci = X % in_shape.w;
int hw_block = X / in_shape.w;
int hi = H * block_size + hw_block / block_size;
int wi = Y * block_size + hw_block % block_size;
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z),
READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi)));
}

@ -0,0 +1,87 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <set>
#include <string>
#include <map>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/kernel/space_to_depth.h"
#include "src/runtime/kernel/opencl/cl/space_to_depth.cl.inc"
using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_NULL_PTR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::schema::PrimitiveType_SpaceToDepth;
namespace mindspore::kernel {
int SpaceToDepthOpenCLKernel::CheckSpecs() { return RET_OK; }
int SpaceToDepthOpenCLKernel::Prepare() {
std::string kernel_name;
in_shape_ = Image2DInfo(in_tensors_[0]);
out_shape_ = Image2DInfo(out_tensors_[0]);
if (in_shape_.C % 4 != 0) {
kernel_name = "SpaceToDepth";
} else {
kernel_name = "SpaceToDepthAlign";
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = space_to_depth_source;
std::string program_name = "SpaceToDepth";
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
SetConstArgs();
SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return mindspore::lite::RET_OK;
}
void SpaceToDepthOpenCLKernel::SetConstArgs() {
cl_int4 cl_in_shape = {static_cast<cl_int>(in_shape_.N), static_cast<cl_int>(in_shape_.H),
static_cast<cl_int>(in_shape_.W), static_cast<cl_int>(in_shape_.Slice)};
cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
auto param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_);
int ci_size = in_shape_.C;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size);
}
void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
global_range_ = {out_shape_.Slice, out_shape_.W, out_shape_.H * out_shape_.N};
}
int SpaceToDepthOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr);
return mindspore::lite::RET_OK;
}
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
} // namespace mindspore::kernel

@ -0,0 +1,47 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SAPCE_TO_DEPTH_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SAPCE_TO_DEPTH_H_
#include <vector>
#include <string>
#include "src/lite_kernel.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/space_to_depth.h"
namespace mindspore::kernel {
class SpaceToDepthOpenCLKernel : public OpenCLKernel {
public:
SpaceToDepthOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: OpenCLKernel(parameter, inputs, outputs) {}
~SpaceToDepthOpenCLKernel() override = default;
int Run() override;
int Prepare() override;
int CheckSpecs() override;
void SetConstArgs() override;
void SetGlobalLocal() override;
private:
cl::Kernel kernel_;
Image2DInfo in_shape_ = Image2DInfo(nullptr);
Image2DInfo out_shape_ = Image2DInfo(nullptr);
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SAPCE_TO_DEPTH_H_
Loading…
Cancel
Save