parent
e9325ea8b1
commit
1bb0e2943b
@ -0,0 +1,34 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/pool_cudnn_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
|
||||
ops::PoolOpGrad);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(pool2d_cudnn,
|
||||
ops::PoolKernel<paddle::platform::CPUPlace, float>);
|
||||
REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad,
|
||||
ops::PoolGradKernel<paddle::platform::CPUPlace, float>)
|
||||
|
||||
// REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
|
||||
// ops::PoolOpGrad);
|
||||
//
|
||||
// REGISTER_OP_CPU_KERNEL(pool3d_cudnn,
|
||||
// ops::PoolKernel<paddle::platform::CPUPlace, float>);
|
||||
// REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad,
|
||||
// ops::PoolGradKernel<paddle::platform::CPUPlace,
|
||||
// float>);
|
@ -0,0 +1,174 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/pool_cudnn_op.h"
|
||||
#include "paddle/platform/cudnn_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
|
||||
using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor;
|
||||
using DataLayout = platform::DataLayout;
|
||||
using PoolingMode = platform::PoolingMode;
|
||||
|
||||
// NOTE: copy from conv_cudnn
|
||||
std::vector<int> Dims2Vector(const framework::DDim &dims) {
|
||||
std::vector<int> ret;
|
||||
for (int i = 0; i < dims.size(); i++) {
|
||||
ret.push_back(dims[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class PoolCudnnOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
const Tensor *input = ctx.Input<Tensor>("X");
|
||||
Tensor *output = ctx.Output<Tensor>("Out");
|
||||
|
||||
const T *input_data = input->data<T>();
|
||||
T *output_data = output->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
std::string pooling_type = ctx.Attr<std::string>("poolingType");
|
||||
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
|
||||
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
||||
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
if (ctx.Attr<bool>("globalPooling")) {
|
||||
for (size_t i = 0; i < ksize.size(); ++i) {
|
||||
ksize[i] = static_cast<int>(input->dims()[i + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------- cudnn descriptors ---------------------
|
||||
ScopedTensorDescriptor input_desc;
|
||||
ScopedTensorDescriptor output_desc;
|
||||
ScopedPoolingDescriptor pool_desc;
|
||||
DataLayout layout = DataLayout::kNCHW;
|
||||
|
||||
cudnnTensorDescriptor_t cudnn_input_desc =
|
||||
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()));
|
||||
cudnnTensorDescriptor_t cudnn_output_desc =
|
||||
output_desc.descriptor<T>(layout, Dims2Vector(output->dims()));
|
||||
|
||||
PoolingMode pooling_mode;
|
||||
if (pooling_type == "max") {
|
||||
pooling_mode = PoolingMode::kMaximum;
|
||||
} else {
|
||||
pooling_mode = PoolingMode::kAverage;
|
||||
}
|
||||
|
||||
cudnnPoolingDescriptor_t cudnn_pool_desc =
|
||||
pool_desc.descriptor(pooling_mode, ksize, paddings, strides);
|
||||
|
||||
// ------------------- cudnn pool algorithm ---------------------
|
||||
auto handle = ctx.cuda_device_context().cudnn_handle();
|
||||
T alpha = 1.0f, beta = 0.0f;
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnPoolingForward(
|
||||
handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
|
||||
cudnn_output_desc, output_data));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
const Tensor *input = ctx.Input<Tensor>("X");
|
||||
const Tensor *output = ctx.Input<Tensor>("Out");
|
||||
const Tensor *output_grad =
|
||||
ctx.Input<Tensor>(framework::GradVarName("Out"));
|
||||
Tensor *input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
std::string pooling_type = ctx.Attr<std::string>("poolingType");
|
||||
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
|
||||
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
||||
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
|
||||
if (ctx.Attr<bool>("globalPooling")) {
|
||||
for (size_t i = 0; i < ksize.size(); ++i)
|
||||
ksize[i] = static_cast<int>(input->dims()[i + 2]);
|
||||
}
|
||||
|
||||
const T *input_data = input->data<T>();
|
||||
const T *output_data = output->data<T>();
|
||||
const T *output_grad_data = output_grad->data<T>();
|
||||
|
||||
// ------------------- cudnn descriptors ---------------------
|
||||
ScopedTensorDescriptor input_desc;
|
||||
ScopedTensorDescriptor output_desc;
|
||||
ScopedTensorDescriptor input_grad_desc;
|
||||
ScopedTensorDescriptor output_grad_desc;
|
||||
ScopedPoolingDescriptor pool_desc;
|
||||
DataLayout layout = DataLayout::kNCHW;
|
||||
|
||||
cudnnTensorDescriptor_t cudnn_input_desc =
|
||||
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()));
|
||||
cudnnTensorDescriptor_t cudnn_output_desc =
|
||||
output_desc.descriptor<T>(layout, Dims2Vector(output->dims()));
|
||||
cudnnTensorDescriptor_t cudnn_output_grad_desc =
|
||||
output_grad_desc.descriptor<T>(layout,
|
||||
Dims2Vector(output_grad->dims()));
|
||||
|
||||
PoolingMode pooling_mode;
|
||||
if (pooling_type == "max") {
|
||||
pooling_mode = PoolingMode::kMaximum;
|
||||
} else {
|
||||
pooling_mode = PoolingMode::kAverage;
|
||||
}
|
||||
|
||||
cudnnPoolingDescriptor_t cudnn_pool_desc =
|
||||
pool_desc.descriptor(pooling_mode, ksize, paddings, strides);
|
||||
|
||||
// ------------------- cudnn pool algorithm ---------------------
|
||||
auto handle = ctx.cuda_device_context().cudnn_handle();
|
||||
T alpha = 1.0f, beta = 0.0f;
|
||||
|
||||
if (input_grad) {
|
||||
T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
|
||||
auto temp = framework::EigenVector<T>::Flatten(*input_grad);
|
||||
temp.device(ctx.GetEigenDevice<paddle::platform::GPUPlace>()) =
|
||||
temp.constant(static_cast<T>(0));
|
||||
|
||||
cudnnTensorDescriptor_t cudnn_input_grad_desc =
|
||||
input_grad_desc.descriptor<T>(layout,
|
||||
Dims2Vector(input_grad->dims()));
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnPoolingBackward(
|
||||
handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,
|
||||
cudnn_output_grad_desc, output_grad_data, cudnn_input_desc,
|
||||
input_data, &beta, cudnn_input_grad_desc, input_grad_data));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>);
|
||||
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>);
|
||||
//
|
||||
// REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>);
|
||||
// REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>);
|
@ -0,0 +1,22 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/pool_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,144 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
|
||||
|
||||
N, C, H, W = x.shape
|
||||
if global_pool == 1:
|
||||
ksize = [H, W]
|
||||
H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1
|
||||
W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1
|
||||
out = np.zeros((N, C, H_out, W_out))
|
||||
for i in xrange(H_out):
|
||||
for j in xrange(W_out):
|
||||
r_start = np.max((i * strides[0] - paddings[0], 0))
|
||||
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H))
|
||||
c_start = np.max((j * strides[1] - paddings[1], 0))
|
||||
c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W))
|
||||
x_masked = x[:, :, r_start:r_end, c_start:c_end]
|
||||
|
||||
out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
|
||||
return out
|
||||
|
||||
|
||||
def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0):
|
||||
|
||||
N, C, H, W = x.shape
|
||||
if global_pool == 1:
|
||||
ksize = [H, W]
|
||||
H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1
|
||||
W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1
|
||||
out = np.zeros((N, C, H_out, W_out))
|
||||
for i in xrange(H_out):
|
||||
for j in xrange(W_out):
|
||||
r_start = np.max((i * strides[0] - paddings[0], 0))
|
||||
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H))
|
||||
c_start = np.max((j * strides[1] - paddings[1], 0))
|
||||
c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W))
|
||||
x_masked = x[:, :, r_start:r_end, c_start:c_end]
|
||||
|
||||
out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / (
|
||||
(r_end - r_start) * (c_end - c_start))
|
||||
return out
|
||||
|
||||
|
||||
class TestPool2d_cudnn_Op(OpTest):
|
||||
def setUp(self):
|
||||
self.initTestCase()
|
||||
input = np.random.random(self.shape).astype("float32")
|
||||
output = self.pool2D_forward_naive(input, self.ksize, self.strides,
|
||||
self.paddings, self.global_pool)
|
||||
self.inputs = {'X': input}
|
||||
|
||||
self.attrs = {
|
||||
'strides': self.strides,
|
||||
'paddings': self.paddings,
|
||||
'ksize': self.ksize,
|
||||
'poolingType': self.pool_type,
|
||||
'globalPooling': self.global_pool,
|
||||
}
|
||||
|
||||
self.outputs = {'Out': output}
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
def test_check_grad(self):
|
||||
if self.pool_type != "max":
|
||||
self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
|
||||
|
||||
def initTestCase(self):
|
||||
self.global_pool = True
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "avg"
|
||||
self.pool2D_forward_naive = avg_pool2D_forward_naive
|
||||
self.shape = [2, 3, 5, 5]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [0, 0]
|
||||
|
||||
|
||||
class TestCase1(TestPool2d_cudnn_Op):
|
||||
def initTestCase(self):
|
||||
self.global_pool = False
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "avg"
|
||||
self.pool2D_forward_naive = avg_pool2D_forward_naive
|
||||
self.shape = [2, 3, 7, 7]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [0, 0]
|
||||
|
||||
|
||||
class TestCase2(TestPool2d_cudnn_Op):
|
||||
def initTestCase(self):
|
||||
self.global_pool = False
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "avg"
|
||||
self.pool2D_forward_naive = avg_pool2D_forward_naive
|
||||
self.shape = [2, 3, 7, 7]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [1, 1]
|
||||
|
||||
|
||||
class TestCase3(TestPool2d_cudnn_Op):
|
||||
def initTestCase(self):
|
||||
self.global_pool = True
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "max"
|
||||
self.pool2D_forward_naive = max_pool2D_forward_naive
|
||||
self.shape = [2, 3, 5, 5]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [0, 0]
|
||||
|
||||
|
||||
class TestCase4(TestPool2d_cudnn_Op):
|
||||
def initTestCase(self):
|
||||
self.global_pool = False
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "max"
|
||||
self.pool2D_forward_naive = max_pool2D_forward_naive
|
||||
self.shape = [2, 3, 7, 7]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [0, 0]
|
||||
|
||||
|
||||
class TestCase5(TestPool2d_cudnn_Op):
|
||||
def initTestCase(self):
|
||||
self.global_pool = False
|
||||
self.op_type = "pool2d_cudnn"
|
||||
self.pool_type = "max"
|
||||
self.pool2D_forward_naive = max_pool2D_forward_naive
|
||||
self.shape = [2, 3, 7, 7]
|
||||
self.ksize = [3, 3]
|
||||
self.strides = [1, 1]
|
||||
self.paddings = [1, 1]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue