!10405 [MS][LITE][Develop] add new ops named layer_norm for gpu
From: @pengyongrong Reviewed-by: @ddwsky,@zhanghaibo5 Signed-off-by: @ddwskypull/10405/MERGE
commit
c243fb92bd
@ -0,0 +1,103 @@
|
|||||||
|
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||||
|
__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
||||||
|
#define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
|
||||||
|
#define C4NUM 4
|
||||||
|
|
||||||
|
__kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global FLT *mean_, __global FLT *variance_,
|
||||||
|
int4 in_shape, int normalized_shape_size) {
|
||||||
|
int X = get_global_id(0); // n*h
|
||||||
|
int Y = get_global_id(1); // w
|
||||||
|
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int n = X / in_shape.y;
|
||||||
|
int h = X % in_shape.y;
|
||||||
|
int w = Y;
|
||||||
|
int ci4 = UP_DIV(in_shape.w, C4NUM);
|
||||||
|
int remainder = in_shape.w % C4NUM;
|
||||||
|
FLT4 mean_temp = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
FLT4 var_temp = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
FLT mean = 0.0f;
|
||||||
|
FLT var = 0.0f;
|
||||||
|
|
||||||
|
// compute mean
|
||||||
|
for (int i = 0; i < ci4; ++i) {
|
||||||
|
FLT4 result_temp = READ_IMAGE(src_data, smp_none, (int2)(w * ci4 + i, n * in_shape.y + h));
|
||||||
|
mean_temp += result_temp;
|
||||||
|
}
|
||||||
|
mean = (mean_temp.x + mean_temp.y + mean_temp.z + mean_temp.w) / normalized_shape_size;
|
||||||
|
mean_temp.x = mean_temp.y = mean_temp.z = mean_temp.w = mean;
|
||||||
|
|
||||||
|
// compute var
|
||||||
|
for (int i = 0; i < ci4; ++i) {
|
||||||
|
FLT4 result_temp = READ_IMAGE(src_data, smp_none, (int2)(w * ci4 + i, n * in_shape.y + h));
|
||||||
|
if ((i + 1) * C4NUM <= in_shape.w) {
|
||||||
|
var_temp += (result_temp - mean_temp) * (result_temp - mean_temp);
|
||||||
|
} else {
|
||||||
|
if (remainder == 1) {
|
||||||
|
mean_temp.x = mean;
|
||||||
|
mean_temp.y = mean_temp.z = mean_temp.w = 0.0f;
|
||||||
|
} else if (remainder == 2) {
|
||||||
|
mean_temp.x = mean_temp.y = mean;
|
||||||
|
mean_temp.z = mean_temp.w = 0.0f;
|
||||||
|
} else {
|
||||||
|
mean_temp.x = mean_temp.y = mean_temp.z = mean;
|
||||||
|
mean_temp.w = 0.0f;
|
||||||
|
}
|
||||||
|
var_temp += (result_temp - mean_temp) * (result_temp - mean_temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var = (var_temp.x + var_temp.y + var_temp.z + var_temp.w) / normalized_shape_size;
|
||||||
|
|
||||||
|
// write result to dst
|
||||||
|
int postion = (n * in_shape.y + h) * in_shape.z + w;
|
||||||
|
mean_[postion] = mean;
|
||||||
|
variance_[postion] = var;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void LayerNormalization_NHWC4(__read_only image2d_t src_data, __write_only image2d_t dst_data,
|
||||||
|
__global FLT *mean_, __global FLT *variance_, __global FLT *gamma_,
|
||||||
|
__global FLT *beta_, int4 in_shape, float epsilon_, int normalized_dims_,
|
||||||
|
int elementwise_affine_) {
|
||||||
|
int X = get_global_id(0); // n*h
|
||||||
|
int Y = get_global_id(1); // w
|
||||||
|
int Z = get_global_id(2); // c4
|
||||||
|
if (X >= in_shape.x * in_shape.y || Y >= in_shape.z || Z >= in_shape.w || in_shape.y == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int n = X / in_shape.y;
|
||||||
|
int h = X % in_shape.y;
|
||||||
|
int w = Y;
|
||||||
|
int c = Z;
|
||||||
|
int ci4 = UP_DIV(in_shape.w, C4NUM);
|
||||||
|
int postion_mv = 0;
|
||||||
|
int postion_gb = 0;
|
||||||
|
if (normalized_dims_ == 1) {
|
||||||
|
postion_mv = (n * in_shape.y + h) * in_shape.z + w;
|
||||||
|
postion_gb = c * C4NUM;
|
||||||
|
} else if (normalized_dims_ == 2) {
|
||||||
|
postion_mv = n * in_shape.y + h;
|
||||||
|
postion_gb = w * ci4 * C4NUM + c * C4NUM;
|
||||||
|
} else if (normalized_dims_ == 3) {
|
||||||
|
postion_mv = n;
|
||||||
|
postion_gb = (h * in_shape.z + w) * ci4 * C4NUM + c * C4NUM;
|
||||||
|
}
|
||||||
|
FLT4 result = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
FLT4 result_in = READ_IMAGE(src_data, smp_none, (int2)(w * ci4 + c, n * in_shape.y + h));
|
||||||
|
if (elementwise_affine_) {
|
||||||
|
result.x = ((result_in.x - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_)) * gamma_[postion_gb] +
|
||||||
|
beta_[postion_gb];
|
||||||
|
result.y = ((result_in.y - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_)) * gamma_[postion_gb + 1] +
|
||||||
|
beta_[postion_gb + 1];
|
||||||
|
result.z = ((result_in.z - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_)) * gamma_[postion_gb + 2] +
|
||||||
|
beta_[postion_gb + 2];
|
||||||
|
result.w = ((result_in.w - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_)) * gamma_[postion_gb + 3] +
|
||||||
|
beta_[postion_gb + 3];
|
||||||
|
} else {
|
||||||
|
result.x = ((result_in.x - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_));
|
||||||
|
result.y = ((result_in.y - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_));
|
||||||
|
result.z = ((result_in.z - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_));
|
||||||
|
result.w = ((result_in.w - mean_[postion_mv]) / sqrt(variance_[postion_mv] + epsilon_));
|
||||||
|
}
|
||||||
|
WRITE_IMAGE(dst_data, (int2)((w * ci4 + c), (n * in_shape.y + h)), result);
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,61 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_LAYER_NORM_H_
|
||||||
|
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_LAYER_NORM_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include "src/runtime/kernel/opencl/opencl_kernel.h"
|
||||||
|
|
||||||
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
|
class LayerNormOpenCLKernel : public OpenCLKernel {
|
||||||
|
public:
|
||||||
|
LayerNormOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||||
|
const std::vector<lite::Tensor *> &outputs)
|
||||||
|
: OpenCLKernel(parameter, inputs, outputs) {}
|
||||||
|
|
||||||
|
~LayerNormOpenCLKernel() override = default;
|
||||||
|
|
||||||
|
int Run() override;
|
||||||
|
int Prepare() override;
|
||||||
|
|
||||||
|
int CheckSpecs() override;
|
||||||
|
void SetConstArgs() override;
|
||||||
|
void SetGlobalLocal() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int Initweight();
|
||||||
|
void GetMeanVar();
|
||||||
|
|
||||||
|
private:
|
||||||
|
cl::Kernel kernel_mean_var_;
|
||||||
|
cl::NDRange global_mean_var_, local_mean_var_;
|
||||||
|
bool use_fp16_enable_{false};
|
||||||
|
void *gamma_{nullptr};
|
||||||
|
void *mean_{nullptr};
|
||||||
|
void *var_{nullptr};
|
||||||
|
void *beta_{nullptr};
|
||||||
|
cl_int4 in_shape_{};
|
||||||
|
int elementwise_affine_;
|
||||||
|
int32_t normalized_dims_{1};
|
||||||
|
int normalized_shape_size_{1};
|
||||||
|
float epsilon_{0.0f};
|
||||||
|
cl::Kernel kernel_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mindspore::kernel
|
||||||
|
#endif
|
@ -0,0 +1,62 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "ut/src/runtime/kernel/opencl/common.h"
|
||||||
|
#include "nnacl/layer_norm_parameter.h"
|
||||||
|
|
||||||
|
namespace mindspore::lite::opencl::test {
|
||||||
|
|
||||||
|
class TestOpenCL_LayerNorm : public CommonTest {};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// PrimitiveType_Stack: src/ops/populate/stack_populate.cc
|
||||||
|
OpParameter *CreateParameter(float epsilon, int normalized_dims_, std::vector<int> normalizedShape) {
|
||||||
|
auto *param = test::CreateParameter<LayerNormParameter>(schema::PrimitiveType_LayerNorm);
|
||||||
|
param->elementwise_mode_ = ELEMENTWISE_PER_CHANNEL;
|
||||||
|
param->epsilon_ = epsilon;
|
||||||
|
param->normalized_dims_ = normalized_dims_;
|
||||||
|
for (int i = 0; i < normalizedShape.size() && i < normalized_dims_; ++i) {
|
||||||
|
param->normalized_shape_[i] = normalizedShape[i];
|
||||||
|
}
|
||||||
|
return reinterpret_cast<OpParameter *>(param);
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_LayerNorm, test1) {
|
||||||
|
float epsilon = 1e-5;
|
||||||
|
int normalized_dims_ = 1;
|
||||||
|
std::vector<int> normalizedShape = {5};
|
||||||
|
std::vector<int> input_shape = {2, 3, 4, 5};
|
||||||
|
std::vector<int> gamma_shape = {1, 1, 1, 5};
|
||||||
|
std::vector<int> beta_shape = {1, 1, 1, 5};
|
||||||
|
std::vector<int> output_shape = {2, 3, 4, 5};
|
||||||
|
size_t input_size, gamma_size, beta_size, output_size;
|
||||||
|
std::string inputPpath = "./test_data/layernormfp32_input.bin";
|
||||||
|
std::string gammaPpath = "./test_data/gammafp32_input.bin";
|
||||||
|
std::string betaPpath = "./test_data/betafp32_input.bin";
|
||||||
|
std::string correctOutputPath = "./test_data/layernormfp32_output.bin";
|
||||||
|
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(inputPpath.c_str(), &input_size));
|
||||||
|
auto gamma_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(gammaPpath.c_str(), &gamma_size));
|
||||||
|
auto beta_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(betaPpath.c_str(), &beta_size));
|
||||||
|
auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size));
|
||||||
|
for (auto fp16_enable : {false}) {
|
||||||
|
auto *param = CreateParameter(epsilon, normalized_dims_, normalizedShape);
|
||||||
|
|
||||||
|
TestMain(
|
||||||
|
{{input_shape, input_data, VAR}, {gamma_shape, gamma_data, CONST_TENSOR}, {beta_shape, beta_data, CONST_TENSOR}},
|
||||||
|
{output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace mindspore::lite::opencl::test
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue