You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu

77 lines
2.7 KiB

// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cassert>
#include <cstring>
#include <vector>
#include "paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
// constants for approximating the normal cdf
constexpr float A = 1.41421356237309504; // sqrt(2)
GeluPlugin* CreateGeluPluginDeserialize(const void* buffer, size_t length) {
return new GeluPlugin(buffer, length);
}
REGISTER_TRT_PLUGIN("gelu plugin", CreateGeluPluginDeserialize);
nvinfer1::Dims GeluPlugin::getOutputDimensions(int index,
const nvinfer1::Dims* in_dims,
int nb_inputs) {
assert(nb_inputs == 1);
assert(index < this->getNbOutputs());
nvinfer1::Dims const& input_dims = in_dims[0];
nvinfer1::Dims output_dims = input_dims;
return output_dims;
}
template <typename T, unsigned TPB>
__global__ void geluKernel(const T a, int n, const T* input, T* output) {
const int idx = blockIdx.x * TPB + threadIdx.x;
if (idx < n) {
const T in = input[idx];
const T cdf = 0.5 * (1.0 + erf(in * 0.5 * a));
output[idx] = in * cdf;
}
}
int computeGelu(cudaStream_t stream, int n, const float* input, float* output) {
constexpr int blockSize = 256;
const int gridSize = (n + blockSize - 1) / blockSize;
geluKernel<float, blockSize><<<gridSize, blockSize, 0, stream>>>(A, n, input,
output);
cudaError_t error = cudaGetLastError();
if (error != cudaSuccess) LOG(ERROR) << cudaGetErrorString(error);
return 0;
}
int GeluPlugin::enqueue(int batchSize, const void* const* inputs,
void** outputs, void*, cudaStream_t stream) {
int status = -1;
const float* input = static_cast<const float*>(inputs[0]);
float* output = static_cast<float*>(outputs[0]);
status = computeGelu(stream, input_volume_ * batchSize, input, output);
return status;
}
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle