|
|
|
|
@ -237,8 +237,8 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
|
|
|
|
|
const T* input, const std::vector<int>& input_shape,
|
|
|
|
|
const std::vector<int>& output_shape, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides, const std::vector<int>& paddings,
|
|
|
|
|
PoolProcess pool_compute, bool exclusive, bool adaptive, T* output,
|
|
|
|
|
cudaStream_t stream) {
|
|
|
|
|
bool exclusive, bool adaptive, T* output, gpuStream_t stream,
|
|
|
|
|
PoolProcess pool_compute) {
|
|
|
|
|
const int batch_size = input_shape[0];
|
|
|
|
|
const int input_channels = input_shape[1];
|
|
|
|
|
const int input_height = input_shape[2];
|
|
|
|
|
@ -277,8 +277,9 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
void operator()(const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive, framework::Tensor* output) {
|
|
|
|
|
const std::vector<int>& paddings, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* output,
|
|
|
|
|
PoolProcess pool_process) {
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
const int input_channels = input.dims()[1];
|
|
|
|
|
const int input_height = input.dims()[2];
|
|
|
|
|
@ -311,8 +312,8 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
const framework::Tensor& input, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive, framework::Tensor* output) {
|
|
|
|
|
const std::string data_format, bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* output, PoolProcess pool_process) {
|
|
|
|
|
bool channel_last = (data_format == "NHWC");
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
|
|
|
|
|
@ -367,9 +368,9 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
const framework::Tensor& output_grad,
|
|
|
|
|
const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* input_grad) {
|
|
|
|
|
const std::vector<int>& paddings, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* input_grad,
|
|
|
|
|
PoolProcess pool_process) {
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
const int input_channels = input.dims()[1];
|
|
|
|
|
const int input_height = input.dims()[2];
|
|
|
|
|
@ -399,13 +400,15 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
ksize_width, stride_height, stride_width, padding_height, padding_width,
|
|
|
|
|
pool_process, exclusive, adaptive, input_grad_data);
|
|
|
|
|
}
|
|
|
|
|
void operator()(
|
|
|
|
|
const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input, const framework::Tensor& output,
|
|
|
|
|
const framework::Tensor& output_grad, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides, const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, PoolProcess pool_process, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* input_grad) {
|
|
|
|
|
void operator()(const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input,
|
|
|
|
|
const framework::Tensor& output,
|
|
|
|
|
const framework::Tensor& output_grad,
|
|
|
|
|
const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* input_grad, PoolProcess pool_process) {
|
|
|
|
|
bool channel_last = (data_format == "NHWC");
|
|
|
|
|
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
@ -881,8 +884,9 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
void operator()(const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive, framework::Tensor* output) {
|
|
|
|
|
const std::vector<int>& paddings, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* output,
|
|
|
|
|
PoolProcess pool_process) {
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
const int input_channels = input.dims()[1];
|
|
|
|
|
const int input_depth = input.dims()[2];
|
|
|
|
|
@ -922,8 +926,8 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
const framework::Tensor& input, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive, framework::Tensor* output) {
|
|
|
|
|
const std::string data_format, bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* output, PoolProcess pool_process) {
|
|
|
|
|
bool channel_last = (data_format == "NDHWC");
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
|
|
|
|
|
@ -988,9 +992,9 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
const framework::Tensor& output_grad,
|
|
|
|
|
const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings, PoolProcess pool_process,
|
|
|
|
|
bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* input_grad) {
|
|
|
|
|
const std::vector<int>& paddings, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* input_grad,
|
|
|
|
|
PoolProcess pool_process) {
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
const int input_channels = input.dims()[1];
|
|
|
|
|
const int input_depth = input.dims()[2];
|
|
|
|
|
@ -1028,13 +1032,15 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
|
|
|
|
|
stride_height, stride_width, padding_depth, padding_height,
|
|
|
|
|
padding_width, pool_process, exclusive, adaptive, input_grad_data);
|
|
|
|
|
}
|
|
|
|
|
void operator()(
|
|
|
|
|
const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input, const framework::Tensor& output,
|
|
|
|
|
const framework::Tensor& output_grad, const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides, const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, PoolProcess pool_process, bool exclusive,
|
|
|
|
|
bool adaptive, framework::Tensor* input_grad) {
|
|
|
|
|
void operator()(const platform::CUDADeviceContext& context,
|
|
|
|
|
const framework::Tensor& input,
|
|
|
|
|
const framework::Tensor& output,
|
|
|
|
|
const framework::Tensor& output_grad,
|
|
|
|
|
const std::vector<int>& ksize,
|
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
|
const std::string data_format, bool exclusive, bool adaptive,
|
|
|
|
|
framework::Tensor* input_grad, PoolProcess pool_process) {
|
|
|
|
|
bool channel_last = (data_format == "NDHWC");
|
|
|
|
|
|
|
|
|
|
const int batch_size = input.dims()[0];
|
|
|
|
|
|