You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
417 lines
15 KiB
417 lines
15 KiB
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include <string>
|
|
|
|
#include "paddle/fluid/operators/jit/kernels.h"
|
|
#include "paddle/fluid/operators/math/blas.h"
|
|
#include "paddle/fluid/operators/math/math_function.h"
|
|
#include "paddle/fluid/operators/math/sequence_pooling.h"
|
|
|
|
namespace paddle {
|
|
namespace operators {
|
|
namespace math {
|
|
|
|
using Tensor = framework::Tensor;
|
|
using LoDTensor = framework::LoDTensor;
|
|
template <typename T, int MajorType = Eigen::RowMajor,
|
|
typename IndexType = Eigen::DenseIndex>
|
|
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
|
|
template <typename T, int MajorType = Eigen::RowMajor,
|
|
typename IndexType = Eigen::DenseIndex>
|
|
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
|
|
|
|
template <typename T, bool is_test>
|
|
class MaxSeqPoolFunctor {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& input, T pad_value,
|
|
framework::LoDTensor* output, framework::Tensor* index) {
|
|
auto in_dims = input.dims();
|
|
auto out_dims = output->dims();
|
|
auto idx_dims = index->dims();
|
|
PADDLE_ENFORCE_GT(in_dims.size(), 1,
|
|
"The rank of input shall be greater than 1.");
|
|
PADDLE_ENFORCE_GT(out_dims.size(), 1,
|
|
"The rank of output shall be greater than 1.");
|
|
for (int64_t i = 1; i < in_dims.size(); ++i) {
|
|
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i],
|
|
"The dimension of input and output shall be same.");
|
|
}
|
|
PADDLE_ENFORCE_EQ(idx_dims, out_dims,
|
|
"The dimension of index and output shall be same.");
|
|
|
|
auto lod_level = input.lod().size();
|
|
auto starts = input.lod()[lod_level - 1];
|
|
const T* in_data = input.data<T>();
|
|
T* out_data = output->data<T>();
|
|
int* max_index = index->data<int>();
|
|
|
|
int64_t num_seq = out_dims[0];
|
|
int64_t dim = output->numel() / num_seq;
|
|
for (int64_t i = 0; i < num_seq; ++i) {
|
|
if (starts[i] == starts[i + 1]) {
|
|
for (int64_t k = 0; k < dim; ++k) {
|
|
out_data[i * dim + k] = pad_value;
|
|
max_index[i * dim + k] = -1;
|
|
}
|
|
continue;
|
|
}
|
|
for (int64_t k = 0; k < dim; ++k) {
|
|
out_data[i * dim + k] = in_data[starts[i] * dim + k];
|
|
max_index[i * dim + k] = starts[i];
|
|
}
|
|
for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
|
|
for (int64_t k = 0; k < dim; ++k) {
|
|
if (in_data[j * dim + k] > out_data[i * dim + k]) {
|
|
out_data[i * dim + k] = in_data[j * dim + k];
|
|
max_index[i * dim + k] = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
// Instantisation of Max Sequence Pooling for test phase eg. no need to fill
|
|
// index buffer
|
|
template <typename T>
|
|
class MaxSeqPoolFunctor<T, true> {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& input, T pad_value,
|
|
framework::LoDTensor* output, framework::Tensor* index) {
|
|
auto in_dims = input.dims();
|
|
auto out_dims = output->dims();
|
|
PADDLE_ENFORCE_GT(in_dims.size(), 1,
|
|
"The rank of input shall be greater than 1.");
|
|
PADDLE_ENFORCE_GT(out_dims.size(), 1,
|
|
"The rank of output shall be greater than 1.");
|
|
for (int64_t i = 1; i < in_dims.size(); ++i) {
|
|
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i],
|
|
"The dimension of input and output shall be same.");
|
|
}
|
|
|
|
auto lod_level = input.lod().size();
|
|
auto starts = input.lod()[lod_level - 1];
|
|
const T* in_data = input.data<T>();
|
|
T* out_data = output->data<T>();
|
|
|
|
int64_t num_seq = out_dims[0];
|
|
int64_t dim = output->numel() / num_seq;
|
|
for (int64_t i = 0; i < num_seq; ++i) {
|
|
if (starts[i] == starts[i + 1]) {
|
|
for (int64_t k = 0; k < dim; ++k) {
|
|
out_data[i * dim + k] = pad_value;
|
|
}
|
|
continue;
|
|
}
|
|
std::memcpy(&out_data[i * dim], &in_data[starts[i] * dim],
|
|
dim * sizeof(T));
|
|
for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
|
|
for (int64_t k = 0; k < dim; ++k) {
|
|
if (in_data[j * dim + k] > out_data[i * dim + k]) {
|
|
out_data[i * dim + k] = in_data[j * dim + k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
template <typename T>
|
|
class MaxSeqPoolGradFunctor {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& out_grad,
|
|
const framework::Tensor& index,
|
|
framework::LoDTensor* in_grad) {
|
|
auto og_dims = out_grad.dims();
|
|
auto ig_dims = in_grad->dims();
|
|
auto idx_dims = index.dims();
|
|
PADDLE_ENFORCE_GT(og_dims.size(), 1,
|
|
"The rank of output@Grad shall be greater than 1.");
|
|
PADDLE_ENFORCE_GT(ig_dims.size(), 1,
|
|
"The rank of input@Grad shall be greater than 1.");
|
|
for (int64_t i = 1; i < og_dims.size(); ++i) {
|
|
PADDLE_ENFORCE_EQ(
|
|
og_dims[i], ig_dims[i],
|
|
"The dimension of input@Grad and output@Grad shall be same.");
|
|
}
|
|
PADDLE_ENFORCE_EQ(idx_dims, og_dims,
|
|
"The dimension of index and output@Grad shall be same.");
|
|
|
|
const T* og_data = out_grad.data<T>();
|
|
const int* max_index = index.data<int>();
|
|
T* ig_data = in_grad->data<T>();
|
|
|
|
SetConstant<platform::CPUDeviceContext, T> set_zero;
|
|
set_zero(context, in_grad, static_cast<T>(0.0));
|
|
int64_t num_seq = og_dims[0];
|
|
int64_t dim = out_grad.numel() / num_seq;
|
|
for (int64_t i = 0; i < num_seq; ++i) {
|
|
for (int64_t j = 0; j < dim; ++j) {
|
|
int step_id = max_index[i * dim + j];
|
|
if (step_id == -1) continue;
|
|
ig_data[step_id * dim + j] = og_data[i * dim + j];
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class LastSeqPoolFunctor {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& input, T pad_value,
|
|
framework::LoDTensor* output) {
|
|
// Create pointers to input and output data
|
|
auto* in_data = input.data<T>();
|
|
auto* out_data = output->data<T>();
|
|
|
|
// Calculate the size of each item in sequence
|
|
int64_t item_size = input.numel() / input.dims()[0];
|
|
auto lod_level = input.lod().size();
|
|
auto lod = input.lod()[lod_level - 1];
|
|
int seq_num = static_cast<int>(lod.size()) - 1;
|
|
for (int i = 0; i < seq_num; ++i) {
|
|
// Calculate the length of each sequence
|
|
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
|
if (seq_len == 0) {
|
|
for (int j = 0; j < item_size; ++j) {
|
|
out_data[j] = pad_value;
|
|
}
|
|
} else {
|
|
// Point to the begin of next sequence
|
|
in_data += seq_len * item_size;
|
|
// Copy the last item of sequence to output
|
|
std::memcpy(out_data, (in_data - item_size), item_size * sizeof(T));
|
|
}
|
|
out_data += item_size;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class FirstSeqPoolFunctor {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& input, T pad_value,
|
|
framework::LoDTensor* output) {
|
|
// Create pointers to input and output data
|
|
auto* in_data = input.data<T>();
|
|
auto* out_data = output->data<T>();
|
|
|
|
// Calculate the size of each item in sequence
|
|
int64_t item_size = input.numel() / input.dims()[0];
|
|
auto lod_level = input.lod().size();
|
|
auto lod = input.lod()[lod_level - 1];
|
|
int seq_num = static_cast<int>(lod.size()) - 1;
|
|
for (int i = 0; i < seq_num; ++i) {
|
|
// Calculate the length of each sequence
|
|
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
|
if (seq_len == 0) {
|
|
for (int j = 0; j < item_size; ++j) {
|
|
out_data[j] = pad_value;
|
|
}
|
|
} else {
|
|
// Copy the first item of sequence to output
|
|
std::memcpy(out_data, in_data, item_size * sizeof(T));
|
|
// Point to the next sequence
|
|
in_data += seq_len * item_size;
|
|
}
|
|
out_data += item_size;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class SumSeqPoolGradFunctor {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const framework::LoDTensor& out_grad,
|
|
framework::LoDTensor* in_grad) {
|
|
auto lod_level = in_grad->lod().size();
|
|
auto lod = in_grad->lod()[lod_level - 1];
|
|
int64_t out_w = out_grad.numel() / out_grad.dims()[0];
|
|
int64_t in_w = in_grad->numel() / in_grad->dims()[0];
|
|
PADDLE_ENFORCE_EQ(
|
|
in_w, out_w,
|
|
"The feature size of input@Grad and output@Grad shall be same.");
|
|
const T* out_g_data = out_grad.data<T>();
|
|
T* in_g_data = in_grad->mutable_data<T>(context.GetPlace());
|
|
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(context);
|
|
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
|
|
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
|
if (h == 0) continue;
|
|
int64_t in_offset = lod[i] * in_w;
|
|
const T* out_pos = out_g_data + i * out_w;
|
|
T* in_pos = in_g_data + in_offset;
|
|
for (int r = 0; r != h; ++r) {
|
|
blas.VCOPY(in_w, out_pos, in_pos + r * in_w);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class SequencePoolFunctor<platform::CPUDeviceContext, T> {
|
|
public:
|
|
/* max pool has index output */
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const std::string pooltype, T pad_value,
|
|
const framework::LoDTensor& input,
|
|
framework::LoDTensor* output, bool is_test,
|
|
framework::Tensor* index = nullptr) {
|
|
if (pooltype == "MAX") {
|
|
if (is_test) {
|
|
math::MaxSeqPoolFunctor<T, true> max_pool;
|
|
max_pool(context, input, pad_value, output, index);
|
|
} else {
|
|
math::MaxSeqPoolFunctor<T, false> max_pool;
|
|
max_pool(context, input, pad_value, output, index);
|
|
}
|
|
return;
|
|
}
|
|
if (pooltype == "LAST") {
|
|
math::LastSeqPoolFunctor<T> last_pool;
|
|
last_pool(context, input, pad_value, output);
|
|
return;
|
|
}
|
|
if (pooltype == "FIRST") {
|
|
math::FirstSeqPoolFunctor<T> first_pool;
|
|
first_pool(context, input, pad_value, output);
|
|
return;
|
|
}
|
|
auto lod_level = input.lod().size();
|
|
auto lod = input.lod()[lod_level - 1];
|
|
if (pooltype == "SUM") {
|
|
auto place = context.GetPlace();
|
|
PADDLE_ENFORCE_EQ(
|
|
platform::is_cpu_place(place), true,
|
|
"Sequence_pool should run on CPU Device when pooltype is SUM");
|
|
const T* src = input.data<T>();
|
|
T* dst = output->mutable_data<T>(place);
|
|
jit::seq_pool_attr_t attr(
|
|
static_cast<int>(input.numel() / input.dims()[0]),
|
|
jit::SeqPoolType::kSum);
|
|
auto seqpool =
|
|
jit::KernelFuncs<jit::SeqPoolTuple<T>, platform::CPUPlace>::Cache()
|
|
.At(attr);
|
|
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
|
|
attr.h = static_cast<int>(lod[i + 1] - lod[i]);
|
|
if (attr.h == 0) {
|
|
for (int j = 0; j < attr.w; ++j) {
|
|
dst[j] = pad_value;
|
|
}
|
|
} else {
|
|
seqpool(src, dst, &attr);
|
|
}
|
|
dst += attr.w;
|
|
src += attr.h * attr.w;
|
|
}
|
|
return;
|
|
}
|
|
auto& place = *context.eigen_device();
|
|
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
|
|
Tensor out_t = output->Slice(i, i + 1);
|
|
int64_t w = input.numel() / input.dims()[0];
|
|
if (lod[i] == lod[i + 1]) {
|
|
for (int j = 0; j < w; ++j) {
|
|
out_t.data<T>()[j] = pad_value;
|
|
}
|
|
continue;
|
|
}
|
|
Tensor in_t =
|
|
input.Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
|
|
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
|
auto in_e = EigenMatrix<T>::From(in_t, framework::make_ddim({h, w}));
|
|
auto out_e = EigenVector<T>::Flatten(out_t);
|
|
if (pooltype == "AVERAGE") {
|
|
out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}}));
|
|
} else if (pooltype == "SQRT") {
|
|
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
|
|
std::sqrt(static_cast<T>(h));
|
|
} else {
|
|
PADDLE_THROW("unsupported pooling pooltype");
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class SequencePoolGradFunctor<platform::CPUDeviceContext, T> {
|
|
public:
|
|
void operator()(const platform::CPUDeviceContext& context,
|
|
const std::string pooltype,
|
|
const framework::LoDTensor& out_grad,
|
|
framework::LoDTensor* in_grad,
|
|
/* max pool has index */
|
|
const framework::Tensor* index = nullptr) {
|
|
if (pooltype == "MAX") {
|
|
math::MaxSeqPoolGradFunctor<T> max_pool_grad;
|
|
max_pool_grad(context, out_grad, *index, in_grad);
|
|
return;
|
|
}
|
|
|
|
if (pooltype == "LAST" || pooltype == "FIRST") {
|
|
// set X@Grad be zero at first when pooltype is LAST/FIRST
|
|
math::SetConstant<platform::CPUDeviceContext, T> functor;
|
|
functor(context, in_grad, 0);
|
|
}
|
|
|
|
if (pooltype == "SUM") {
|
|
math::SumSeqPoolGradFunctor<T> sum_pool_grad;
|
|
sum_pool_grad(context, out_grad, in_grad);
|
|
return;
|
|
}
|
|
|
|
auto lod_level = in_grad->lod().size();
|
|
auto lod = in_grad->lod()[lod_level - 1];
|
|
auto& place = *context.eigen_device();
|
|
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
|
|
if (lod[i] == lod[i + 1]) continue;
|
|
auto in_g_t = in_grad->Slice(static_cast<int>(lod[i]),
|
|
static_cast<int>(lod[i + 1]));
|
|
auto out_g_t = out_grad.Slice(i, i + 1);
|
|
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
|
int64_t w = in_grad->numel() / in_grad->dims()[0];
|
|
auto in_g_e = EigenMatrix<T>::From(in_g_t, {h, w});
|
|
auto out_g_e = EigenMatrix<T>::From(out_g_t, {1, w});
|
|
auto out_g_e_v = EigenVector<T>::Flatten(out_g_t);
|
|
Eigen::DSizes<int, 2> bcast(h, 1);
|
|
|
|
if (pooltype == "AVERAGE") {
|
|
in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast);
|
|
} else if (pooltype == "SQRT") {
|
|
in_g_e.device(place) =
|
|
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
|
|
} else if (pooltype == "LAST") {
|
|
in_g_e.chip(h - 1, 0).device(place) = out_g_e_v;
|
|
} else if (pooltype == "FIRST") {
|
|
in_g_e.chip(0, 0).device(place) = out_g_e_v;
|
|
} else {
|
|
PADDLE_THROW("unsupported pooling pooltype");
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template class SequencePoolFunctor<platform::CPUDeviceContext, float>;
|
|
template class SequencePoolFunctor<platform::CPUDeviceContext, double>;
|
|
template class SequencePoolGradFunctor<platform::CPUDeviceContext, float>;
|
|
template class SequencePoolGradFunctor<platform::CPUDeviceContext, double>;
|
|
|
|
} // namespace math
|
|
} // namespace operators
|
|
} // namespace paddle
|