commit
25adf970b2
@ -1 +1,3 @@
|
||||
nv_library(tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu prelu_op_plugin.cu DEPS enforce device_context)
|
||||
nv_library(tensorrt_plugin
|
||||
SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu prelu_op_plugin.cu
|
||||
DEPS enforce device_context)
|
||||
|
@ -0,0 +1,138 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include "paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace tensorrt {
|
||||
namespace plugin {
|
||||
|
||||
namespace details {
|
||||
|
||||
template <typename T>
|
||||
struct Add {
|
||||
__device__ T operator()(const T& a, const T& b) const { return a + b; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct Mul {
|
||||
__device__ T operator()(const T& a, const T& b) const { return a * b; }
|
||||
};
|
||||
|
||||
template <typename T, typename Operator>
|
||||
__global__ void ColumnWiseKernel(Operator op, const T* x, const T* y, T* out,
|
||||
int batch_size, int num_rows, int num_cols) {
|
||||
for (int batch_id = 0; batch_id < batch_size; ++batch_id) {
|
||||
int row = blockIdx.x;
|
||||
for (; row < num_rows; row += gridDim.x) {
|
||||
T value_y = y[batch_id * num_rows + row];
|
||||
int col = threadIdx.x;
|
||||
int offset = (batch_id * num_rows + row) * num_cols;
|
||||
for (; col < num_cols; col += blockDim.x) {
|
||||
T value_x = x[offset + col];
|
||||
out[offset + col] = op(value_x, value_y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Operator>
|
||||
static void ElementWise(Operator op, const T* x, const T* y, T* out,
|
||||
int batch_size, int prev, int midd, int post,
|
||||
cudaStream_t stream) {
|
||||
const int kThreadsPerBlock = 1024;
|
||||
const int kMaximumBlocks = 65535;
|
||||
if (prev == 1) {
|
||||
int num_threads = (post > kThreadsPerBlock) ? kThreadsPerBlock
|
||||
: (((post + 31) >> 5) << 5);
|
||||
int num_blocks = (midd < kMaximumBlocks) ? midd : kMaximumBlocks;
|
||||
ColumnWiseKernel<<<num_blocks, num_threads, 0, stream>>>(
|
||||
op, x, y, out, batch_size, midd, post);
|
||||
} else if (post == 1) {
|
||||
PADDLE_THROW("Not implemented.");
|
||||
} else {
|
||||
PADDLE_THROW("Not implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
|
||||
nvinfer1::Dims ElementWisePlugin::getOutputDimensions(
|
||||
int index, const nvinfer1::Dims* input_dims, int num_inputs) {
|
||||
PADDLE_ENFORCE_EQ(index, 0);
|
||||
PADDLE_ENFORCE_EQ(num_inputs, 2);
|
||||
PADDLE_ENFORCE_NOT_NULL(input_dims);
|
||||
return input_dims[0];
|
||||
}
|
||||
|
||||
int ElementWisePlugin::initialize() {
|
||||
PADDLE_ENFORCE_GT(dims_y_.nbDims, 0);
|
||||
|
||||
axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_;
|
||||
int trimed_nb_dims = dims_y_.nbDims;
|
||||
for (; trimed_nb_dims > 0; --trimed_nb_dims) {
|
||||
if (dims_y_.d[trimed_nb_dims - 1] != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
dims_y_.nbDims = trimed_nb_dims;
|
||||
|
||||
PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_);
|
||||
PADDLE_ENFORCE_LT(axis_, dims_x_.nbDims);
|
||||
|
||||
prev_size_ = 1;
|
||||
midd_size_ = 1;
|
||||
post_size_ = 1;
|
||||
for (int i = 0; i < axis_; ++i) {
|
||||
prev_size_ *= dims_x_.d[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < dims_y_.nbDims; ++i) {
|
||||
PADDLE_ENFORCE_EQ(dims_x_.d[i + axis_], dims_y_.d[i],
|
||||
"Broadcast dimension mismatch.");
|
||||
midd_size_ *= dims_y_.d[i];
|
||||
}
|
||||
|
||||
for (int i = axis_ + dims_y_.nbDims; i < dims_x_.nbDims; ++i) {
|
||||
post_size_ *= dims_x_.d[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ElementWisePlugin::enqueue(int batch_size, const void* const* inputs,
|
||||
void** outputs, void* workspace,
|
||||
cudaStream_t stream) {
|
||||
const float* x = reinterpret_cast<const float*>(inputs[0]);
|
||||
const float* y = reinterpret_cast<const float*>(inputs[1]);
|
||||
float* out = reinterpret_cast<float*>(outputs[0]);
|
||||
|
||||
if (type_ == nvinfer1::ElementWiseOperation::kSUM) {
|
||||
details::ElementWise(details::Add<float>(), x, y, out, batch_size,
|
||||
prev_size_, midd_size_, post_size_, stream);
|
||||
} else if (type_ == nvinfer1::ElementWiseOperation::kPROD) {
|
||||
details::ElementWise(details::Mul<float>(), x, y, out, batch_size,
|
||||
prev_size_, midd_size_, post_size_, stream);
|
||||
} else {
|
||||
PADDLE_THROW("Not implemented.");
|
||||
}
|
||||
|
||||
return cudaGetLastError() != cudaSuccess;
|
||||
}
|
||||
|
||||
} // namespace plugin
|
||||
} // namespace tensorrt
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,87 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace tensorrt {
|
||||
namespace plugin {
|
||||
|
||||
class ElementWisePlugin : public PluginTensorRT {
|
||||
public:
|
||||
ElementWisePlugin(nvinfer1::ElementWiseOperation type,
|
||||
nvinfer1::Dims const &dims_x, nvinfer1::Dims const &dims_y,
|
||||
int axis)
|
||||
: type_(type),
|
||||
dims_x_(dims_x),
|
||||
dims_y_(dims_y),
|
||||
axis_(axis),
|
||||
prev_size_(1),
|
||||
midd_size_(1),
|
||||
post_size_(1) {}
|
||||
|
||||
ElementWisePlugin(void const *serial_data, size_t serial_length) {
|
||||
deserializeBase(serial_data, serial_length);
|
||||
DeserializeValue(&serial_data, &serial_length, &axis_);
|
||||
DeserializeValue(&serial_data, &serial_length, &dims_x_);
|
||||
DeserializeValue(&serial_data, &serial_length, &dims_y_);
|
||||
}
|
||||
|
||||
ElementWisePlugin *clone() const override {
|
||||
// return new ElementWisePlugin(dims_x_, dims_y_, axis_);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *getPluginType() const override { return "elementwise"; }
|
||||
|
||||
nvinfer1::Dims getOutputDimensions(int index,
|
||||
const nvinfer1::Dims *input_dims,
|
||||
int num_inputs) override;
|
||||
|
||||
int initialize() override;
|
||||
|
||||
// execute the layer
|
||||
int enqueue(int batch_size, const void *const *inputs, void **outputs,
|
||||
void *workspace, cudaStream_t stream);
|
||||
|
||||
protected:
|
||||
size_t getSerializationSize() override {
|
||||
return SerializedSize(axis_) + SerializedSize(dims_x_) +
|
||||
SerializedSize(dims_y_) + getBaseSerializationSize();
|
||||
}
|
||||
|
||||
void serialize(void *buffer) override {
|
||||
serializeBase(buffer);
|
||||
SerializeValue(&buffer, axis_);
|
||||
SerializeValue(&buffer, dims_x_);
|
||||
SerializeValue(&buffer, dims_y_);
|
||||
}
|
||||
|
||||
nvinfer1::ElementWiseOperation type_;
|
||||
nvinfer1::Dims dims_x_;
|
||||
nvinfer1::Dims dims_y_;
|
||||
int axis_;
|
||||
int prev_size_;
|
||||
int midd_size_;
|
||||
int post_size_;
|
||||
};
|
||||
|
||||
} // namespace plugin
|
||||
} // namespace tensorrt
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue