Merge pull request #5310 from qingqing01/fix_max_pool
Refine sequence max-pooling and add unit testing of gradient check.mobile_baidu
commit
0977cbfb41
@ -0,0 +1,103 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/math/sequence_pooling.h"
|
||||
#include "paddle/operators/math/math_function.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace math {
|
||||
|
||||
template <typename T>
|
||||
class MaxSeqPoolFunctor<platform::CPUPlace, T> {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::LoDTensor& input, framework::Tensor* output,
|
||||
framework::Tensor* index) {
|
||||
auto in_dims = input.dims();
|
||||
auto out_dims = output->dims();
|
||||
auto idx_dims = index->dims();
|
||||
PADDLE_ENFORCE_GT(in_dims.size(), 1);
|
||||
PADDLE_ENFORCE_GT(out_dims.size(), 1);
|
||||
for (int64_t i = 1; i < in_dims.size(); ++i) {
|
||||
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
|
||||
|
||||
auto starts = input.lod()[0];
|
||||
const T* in_data = input.data<T>();
|
||||
T* out_data = output->data<T>();
|
||||
int* max_index = index->data<int>();
|
||||
|
||||
int64_t num_seq = out_dims[0];
|
||||
int64_t dim = output->numel() / num_seq;
|
||||
for (int64_t i = 0; i < num_seq; ++i) {
|
||||
for (int64_t k = 0; k < dim; ++k) {
|
||||
out_data[i * dim + k] = in_data[starts[i] * dim + k];
|
||||
max_index[i * dim + k] = starts[i];
|
||||
}
|
||||
for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
|
||||
for (int64_t k = 0; k < dim; ++k) {
|
||||
if (in_data[j * dim + k] > out_data[i * dim + k]) {
|
||||
out_data[i * dim + k] = in_data[j * dim + k];
|
||||
max_index[i * dim + k] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MaxSeqPoolGradFunctor<platform::CPUPlace, T> {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::Tensor& out_grad,
|
||||
const framework::Tensor& index,
|
||||
framework::LoDTensor* in_grad) {
|
||||
auto og_dims = out_grad.dims();
|
||||
auto ig_dims = in_grad->dims();
|
||||
auto idx_dims = index.dims();
|
||||
PADDLE_ENFORCE_GT(og_dims.size(), 1);
|
||||
PADDLE_ENFORCE_GT(ig_dims.size(), 1);
|
||||
for (int64_t i = 1; i < og_dims.size(); ++i) {
|
||||
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
|
||||
|
||||
const T* og_data = out_grad.data<T>();
|
||||
const int* max_index = index.data<int>();
|
||||
T* ig_data = in_grad->data<T>();
|
||||
|
||||
SetConstant<platform::CPUPlace, T> set_zero;
|
||||
set_zero(context, in_grad, static_cast<T>(0.0));
|
||||
int64_t num_seq = og_dims[0];
|
||||
int64_t dim = out_grad.numel() / num_seq;
|
||||
for (int64_t i = 0; i < num_seq; ++i) {
|
||||
for (int64_t j = 0; j < dim; ++j) {
|
||||
int step_id = max_index[i * dim + j];
|
||||
ig_data[step_id * dim + j] = og_data[i * dim + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template class MaxSeqPoolFunctor<platform::CPUPlace, float>;
|
||||
template class MaxSeqPoolFunctor<platform::CPUPlace, double>;
|
||||
template class MaxSeqPoolGradFunctor<platform::CPUPlace, float>;
|
||||
template class MaxSeqPoolGradFunctor<platform::CPUPlace, double>;
|
||||
|
||||
} // namespace math
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,136 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/math/math_function.h"
|
||||
#include "paddle/operators/math/sequence_pooling.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace math {
|
||||
|
||||
#define FLT_MAX __FLT_MAX__
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeMaxSequencePool(const T* input, const size_t* starts,
|
||||
T* output, int* index, int64_t num_seq,
|
||||
int64_t dim) {
|
||||
int dim_idx = threadIdx.x;
|
||||
int seq_id = blockIdx.x;
|
||||
if (seq_id >= num_seq) return;
|
||||
size_t start = starts[seq_id];
|
||||
size_t end = starts[seq_id + 1];
|
||||
|
||||
for (int64_t i = dim_idx; i < dim; i += blockDim.x) {
|
||||
T max_val = static_cast<T>(-FLT_MAX);
|
||||
int max_id = -1;
|
||||
for (size_t step_id = start; step_id < end; step_id++) {
|
||||
if (max_val < input[step_id * dim + i]) {
|
||||
max_val = input[step_id * dim + i];
|
||||
max_id = step_id;
|
||||
}
|
||||
}
|
||||
output[seq_id * dim + i] = max_val;
|
||||
index[seq_id * dim + i] = max_id;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class MaxSeqPoolFunctor<platform::GPUPlace, T> {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::LoDTensor& input, framework::Tensor* output,
|
||||
framework::Tensor* index) {
|
||||
auto in_dims = input.dims();
|
||||
auto out_dims = output->dims();
|
||||
auto idx_dims = index->dims();
|
||||
PADDLE_ENFORCE_GT(in_dims.size(), static_cast<int64_t>(1));
|
||||
PADDLE_ENFORCE_GT(out_dims.size(), 1);
|
||||
for (int64_t i = 1; i < in_dims.size(); ++i) {
|
||||
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
|
||||
|
||||
auto starts = input.lod()[0];
|
||||
const T* in_data = input.data<T>();
|
||||
T* out_data = output->data<T>();
|
||||
int* max_index = index->data<int>();
|
||||
|
||||
int64_t num_seq = out_dims[0];
|
||||
int64_t dim = output->numel() / num_seq;
|
||||
|
||||
dim3 threads(256, 1);
|
||||
dim3 grid(num_seq, 1);
|
||||
auto stream =
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
|
||||
KeMaxSequencePool<T><<<grid, threads, 0, stream>>>(
|
||||
in_data, starts.data(), out_data, max_index, num_seq, dim);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void KeMaxSequencePoolGrad(const T* out_grad, const int* max_index,
|
||||
T* in_grad, int64_t num_seq,
|
||||
int64_t dim) {
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int col_idx = idx % dim;
|
||||
if (idx < num_seq * dim) {
|
||||
int step_id = max_index[idx];
|
||||
in_grad[step_id * dim + col_idx] = out_grad[idx];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class MaxSeqPoolGradFunctor<platform::GPUPlace, T> {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::Tensor& out_grad,
|
||||
const framework::Tensor& index,
|
||||
framework::LoDTensor* in_grad) {
|
||||
auto og_dims = out_grad.dims();
|
||||
auto idx_dims = index.dims();
|
||||
auto ig_dims = in_grad->dims();
|
||||
PADDLE_ENFORCE_GT(og_dims.size(), static_cast<int64_t>(1));
|
||||
PADDLE_ENFORCE_GT(ig_dims.size(), static_cast<int64_t>(1));
|
||||
for (int64_t i = 1; i < og_dims.size(); ++i) {
|
||||
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
|
||||
|
||||
const T* og_data = out_grad.data<T>();
|
||||
const int* max_index = index.data<int>();
|
||||
T* ig_data = in_grad->data<T>();
|
||||
|
||||
SetConstant<platform::GPUPlace, T> set_zero;
|
||||
set_zero(context, in_grad, static_cast<T>(0.0));
|
||||
int64_t num_seq = og_dims[0];
|
||||
int64_t dim = out_grad.numel() / num_seq;
|
||||
|
||||
unsigned int blocks = (num_seq * dim + 128 - 1) / 128;
|
||||
dim3 threads(128, 1);
|
||||
dim3 grid(blocks, 1);
|
||||
auto stream =
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
|
||||
KeMaxSequencePoolGrad<T><<<grid, threads, 0, stream>>>(
|
||||
og_data, max_index, ig_data, num_seq, dim);
|
||||
}
|
||||
};
|
||||
|
||||
template class MaxSeqPoolFunctor<platform::GPUPlace, float>;
|
||||
template class MaxSeqPoolFunctor<platform::GPUPlace, double>;
|
||||
template class MaxSeqPoolGradFunctor<platform::GPUPlace, float>;
|
||||
template class MaxSeqPoolGradFunctor<platform::GPUPlace, double>;
|
||||
|
||||
} // namespace math
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,45 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/lod_tensor.h"
|
||||
#include "paddle/framework/tensor.h"
|
||||
#include "paddle/platform/device_context.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace math {
|
||||
|
||||
#define FLT_MAX __FLT_MAX__
|
||||
|
||||
template <typename Place, typename T>
|
||||
class MaxSeqPoolFunctor {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::LoDTensor& input, framework::Tensor* output,
|
||||
framework::Tensor* index);
|
||||
};
|
||||
|
||||
template <typename Place, class T>
|
||||
class MaxSeqPoolGradFunctor {
|
||||
public:
|
||||
void operator()(const platform::DeviceContext& context,
|
||||
const framework::Tensor& out_grad,
|
||||
const framework::Tensor& index,
|
||||
framework::LoDTensor* in_grad);
|
||||
};
|
||||
|
||||
} // namespace math
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
Loading…
Reference in new issue