From 7c671466320ac257c6a8637ac283512702d5eecd Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 May 2018 14:12:46 +0000 Subject: [PATCH 01/10] Add forward and backward. --- paddle/fluid/operators/sequence_pad_op.cc | 131 ++++++++++++++++++++++ paddle/fluid/operators/sequence_pad_op.cu | 23 ++++ paddle/fluid/operators/sequence_pad_op.h | 97 ++++++++++++++++ 3 files changed, 251 insertions(+) create mode 100644 paddle/fluid/operators/sequence_pad_op.cc create mode 100644 paddle/fluid/operators/sequence_pad_op.cu create mode 100644 paddle/fluid/operators/sequence_pad_op.h diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc new file mode 100644 index 0000000000..183d38fcc9 --- /dev/null +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -0,0 +1,131 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/sequence_pad_op.h" + +namespace paddle { +namespace operators { + +class SequencePadOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SequencePadOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SequencePadOp should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + + PADDLE_ENFORCE_EQ(x_dims.size(), 2, + "Only support 2-D tensor, rank of Input(X) should be 2."); + + auto out_dims = x_dims; + + if (ctx->IsRuntime()) { + framework::Variable* x_var = + boost::get(ctx->GetInputVarPtrs("X")[0]); + + auto& x_lod = x_var->Get().lod(); + + PADDLE_ENFORCE_GE(x_lod.size(), 1, + "Input(X) should be sequences containing lod."); + + auto last_level_lod = x_lod[x_lod.size() - 1]; + size_t max_len = 0; + + for (size_t i = 1; i < last_level_lod.size(); ++i) { + auto seq_len = last_level_lod[i] - last_level_lod[i - 1]; + max_len = max_len < seq_len ? seq_len : max_len; + } + + out_dims[0] = max_len * (last_level_lod.size() - 1); + } else { + framework::VarDesc* x_desc = + boost::get(ctx->GetInputVarPtrs("X")[0]); + PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1, + "Input(X) should be sequences containing lod."); + out_dims[0] = -1; + } + + ctx->SetOutputDim("Out", out_dims); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequencePadOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor, default LoDTensor) Input variable which " + "should contain lod information. Length of each sequence would " + "be computed from the most bottom level lod."); + AddOutput("Out", + "(Tensor) Output variable which would be a common tensor " + "without lod. Each sequence would be padded to the maximum " + "length."); + AddAttr("pad_value", + "(float, default 0.0) Value to be padded " + "to the end of each sequence."); + AddComment(R"DOC( + + )DOC"); + } +}; + +class SequencePadGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SequencePadGradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) of SequencePadGradOp should not be null."); + + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(sequence_pad, ops::SequencePadOp, ops::SequencePadOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(sequence_pad_grad, ops::SequencePadGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_pad, + ops::SequencePadOpKernel, + ops::SequencePadOpKernel, + ops::SequencePadOpKernel, + ops::SequencePadOpKernel); +REGISTER_OP_CPU_KERNEL( + sequence_pad_grad, + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel); diff --git a/paddle/fluid/operators/sequence_pad_op.cu b/paddle/fluid/operators/sequence_pad_op.cu new file mode 100644 index 0000000000..a2fa62957e --- /dev/null +++ b/paddle/fluid/operators/sequence_pad_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/sequence_pad_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + sequence_pad, + ops::SequencePadOpKernel); +REGISTER_OP_CUDA_KERNEL( + sequence_pad_grad, + ops::SequencePadGradOpKernel); diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h new file mode 100644 index 0000000000..b36465d8e7 --- /dev/null +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; + +// @TODO clean code +template +class SequencePadOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x_ptr = ctx.Input("X"); + auto* out_ptr = ctx.Output("Out"); + + out_ptr->mutable_data(ctx.GetPlace()); + + T pad_value = static_cast(ctx.Attr("pad_value")); + + math::SetConstant set_func; + set_func(ctx.template device_context(), out_ptr, pad_value); + + auto& x_lod = x_ptr->lod(); + auto& x_last_level_lod = x_lod[x_lod.size() - 1]; + auto seq_num = x_last_level_lod.size() - 1; + auto max_len = out_ptr->dims()[0] / seq_num; + + PADDLE_ENFORCE_EQ(max_len * seq_num, out_ptr->dims()[0], + "First dimension of `Out` should be equal to " + "maximum length mulplied by sequence number."); + + for (size_t i = 1; i < x_last_level_lod.size(); ++i) { + auto x_start = x_last_level_lod[i - 1]; + auto x_end = x_last_level_lod[i]; + auto out_start = (i - 1) * max_len; + auto out_end = out_start + (x_end - x_start); + auto x_sub_tensor = x_ptr->Slice(x_start, x_end); + auto out_sub_tensor = out_ptr->Slice(out_start, out_end); + framework::TensorCopy(x_sub_tensor, ctx.GetPlace(), &out_sub_tensor); + } + } +}; + +template +class SequencePadGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x_ptr = ctx.Input("X"); + auto* g_out_ptr = ctx.Input(framework::GradVarName("Out")); + auto* g_x_ptr = ctx.Output(framework::GradVarName("X")); + + math::SetConstant set_func; + set_func(ctx.template device_context(), g_x_ptr, + static_cast(0)); + + auto& x_lod = x_ptr->lod(); + auto& x_last_level_lod = x_lod[x_lod.size() - 1]; + auto seq_num = x_last_level_lod.size() - 1; + int64_t max_len = g_out_ptr->dims()[0] / seq_num; + + PADDLE_ENFORCE_EQ(max_len * seq_num, g_out_ptr->dims()[0], + "First dimension of `Out` should be equal to " + "maximum length mulplied by sequence number."); + + for (size_t i = 1; i < x_last_level_lod.size(); ++i) { + auto x_start = x_last_level_lod[i - 1]; + auto x_end = x_last_level_lod[i]; + auto out_start = (i - 1) * max_len; + auto out_end = out_start + (x_end - x_start); + + auto g_out_sub = g_out_ptr->Slice(out_start, out_end); + auto g_x_sub = g_x_ptr->Slice(x_start, x_end); + framework::TensorCopy(g_x_sub, ctx.GetPlace(), &g_out_sub); + } + } +}; + +} // namespace operators +} // namespace paddle From 0797246704aad1392f8d410e5ba179db8592d2e0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 11 May 2018 09:55:59 +0000 Subject: [PATCH 02/10] Enhance sequence_padding functor (CPU and GPU). --- .../fluid/operators/math/sequence_padding.cc | 203 +++++++-------- .../fluid/operators/math/sequence_padding.cu | 231 +++++++----------- .../fluid/operators/math/sequence_padding.h | 66 +++-- paddle/fluid/operators/sequence_pad_op.cc | 40 +-- paddle/fluid/operators/sequence_pad_op.h | 101 +++++--- paddle/fluid/operators/warpctc_op.h | 5 +- 6 files changed, 330 insertions(+), 316 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index d63c6c4ed5..2dd2cafa23 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -18,128 +18,111 @@ namespace paddle { namespace operators { namespace math { -template -class PaddingLoDTensorFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::LoDTensor& seq, framework::Tensor* padding, - bool norm_by_times) { - auto lod = seq.lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The LoD of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq.dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding->dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequence_length, num_sequences, sequence_width]."); - - const int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be the " - "maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be the " - "number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq.numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - const T* seq_data = seq.data(); - T* padding_data = padding->data(); - for (int64_t i = 0; i < max_sequence_length; ++i) { - for (int64_t j = 0; j < num_sequences; ++j) { - int64_t start_pos = abs_offset_lod[level][j]; - int64_t sequence_length = abs_offset_lod[level][j + 1] - start_pos; - if (i < sequence_length) { - // i > 0 => sequence_length > 0 - T scale = - norm_by_times ? (1.0f / static_cast(sequence_length)) : 1.0f; - for (int64_t k = 0; k < sequence_width; ++k) { - padding_data[(i * num_sequences + j) * sequence_width + k] = - seq_data[(start_pos + i) * sequence_width + k] * scale; - } +template +void CopyDataCPU(framework::LoDTensor* seq_tensor, + framework::Tensor* padding_tensor, + const framework::Vector& abs_offset, + const int64_t& max_seq_len, const int64_t& seq_width, + bool seq_to_padding, bool norm_by_len) { + T* seq_data = seq_tensor->data(); + T* padding_data = padding_tensor->data(); + + int64_t seq_num = abs_offset.size() - 1; + + for (int64_t i = 0; i < seq_num; ++i) { + int64_t seq_start = abs_offset[i]; + int64_t seq_len = abs_offset[i + 1] - seq_start; + + T scale = norm_by_len ? (1.0f / static_cast(seq_len)) : 1.0f; + + for (int64_t j = 0; j < seq_len; ++j) { + for (int64_t k = 0; k < seq_width; ++k) { + size_t padding_offset = 0; + if (padding_layout == BATCH_LENGTH_WIDTH) { + padding_offset = (i * max_seq_len * seq_width) + j * seq_width + k; + } else { + padding_offset = (j * seq_num * seq_width) + i * seq_width + k; + } + if (seq_to_padding) { + padding_data[padding_offset] = + seq_data[(seq_start + j) * seq_width + k] * scale; } else { - memset(padding_data + (i * num_sequences + j) * sequence_width, 0, - sequence_width * sizeof(T)); + seq_data[(seq_start + j) * seq_width + k] = + padding_data[padding_offset] * scale; } } } } +} + +template +class PaddingLoDTensorFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(seq_tensor, lod_level); + + auto& lod = seq_tensor.lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor.dims(); + auto padding_dims = padding_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor.numel() / seq_dims[0]; + int64_t numel = max_seq_len * seq_num * seq_width; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + T* padding_data = padding_tensor->data(); + + memset(padding_data, padding_value, numel * sizeof(T)); + + CopyDataCPU( + const_cast(&seq_tensor), padding_tensor, + abs_offset, max_seq_len, seq_width, true /* seq_to_padding */, + norm_by_times); + } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, - framework::LoDTensor* seq, const framework::Tensor& padding, - bool norm_by_times) { - auto lod = seq->lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The LoD of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq->dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding.dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequnece_length, num_sequences, sequence_width]."); - - const int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be " - "the maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be " - "the number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq->numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - const T* padding_data = padding.data(); - T* seq_data = seq->data(); - for (int64_t i = 0; i < num_sequences; ++i) { - int64_t start_pos = abs_offset_lod[level][i]; - int64_t sequence_length = abs_offset_lod[level][i + 1] - start_pos; - for (int64_t j = 0; j < sequence_length; ++j) { - // sequence_width > j > 0 - T scale = - norm_by_times ? (1.0f / static_cast(sequence_length)) : 1.0f; - for (int64_t k = 0; k < sequence_width; ++k) { - seq_data[(start_pos + j) * sequence_width + k] = - padding_data[(j * num_sequences + i) * sequence_width + k] * - scale; - } - } - } + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(*seq_tensor, lod_level); + + auto& lod = seq_tensor->lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto& seq_dims = seq_tensor->dims(); + auto& padding_dims = padding_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + T* seq_data = seq_tensor->data(); + memset(seq_data, static_cast(0), seq_tensor->numel() * sizeof(T)); + + CopyDataCPU( + seq_tensor, const_cast(&padding_tensor), abs_offset, + max_seq_len, seq_width, false /* seq_to_padding */, norm_by_times); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 0956a0c17d..2377bef024 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -19,87 +19,76 @@ namespace paddle { namespace operators { namespace math { -template -__global__ void SequencePaddingKernel(T* padding, T* sequence, - const size_t* sequence_start_positions, - const size_t sequence_width, - const size_t max_sequence_length, - const size_t num_sequences) { +template +__global__ void SequencePaddingKernel( + T* padding_data, T* seq_data, const size_t* abs_offset, + const size_t& seq_num, const size_t& max_seq_len, const size_t& seq_width, + const PaddingLayout& padding_layout, bool norm_by_times = false, + const T& padding_value = 0) { size_t padding_idx = blockIdx.y; - size_t start_pos = sequence_start_positions[padding_idx]; - size_t sequence_length = - sequence_start_positions[padding_idx + 1] - start_pos; + size_t seq_start = abs_offset[padding_idx]; + size_t seq_len = abs_offset[padding_idx + 1] - seq_start; - size_t sequence_idx = blockIdx.x * blockDim.y + threadIdx.y; - size_t padding_base_idx = - (sequence_idx * num_sequences + padding_idx) * sequence_width; - size_t sequence_base_idx = (start_pos + sequence_idx) * sequence_width; + size_t seq_idx = blockIdx.x * blockDim.y + threadIdx.y; - if (sequence_idx < sequence_length) { - T scale = NormByTimes ? (1.0f / static_cast(sequence_length)) : 1.0f; + size_t seq_offset = (seq_start + seq_idx) * seq_width; + + size_t padding_offset = 0; + + if (padding_layout == LENGTH_BATCH_WIDTH) { + padding_offset = (seq_idx * seq_num + padding_idx) * seq_width; + } else { + padding_offset = (padding_idx * max_seq_len + seq_idx) * seq_width; + } + + if (seq_idx < seq_len) { + T scale = norm_by_times ? (1.0f / static_cast(seq_len)) : 1.0f; if (Padding) { /* sequence -> padding */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - padding[padding_base_idx + i] = scale * sequence[sequence_base_idx + i]; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + padding_data[padding_offset + i] = scale * seq_data[seq_offset + i]; } } else { /* padding -> sequence */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - sequence[sequence_base_idx + i] = scale * padding[padding_base_idx + i]; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + seq_data[seq_offset + i] = scale * padding_data[padding_offset + i]; } } - } else if (sequence_idx < max_sequence_length) { + } else if (seq_idx < max_seq_len) { if (Padding) { /* sequence -> padding */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - padding[padding_base_idx + i] = 0; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + padding_data[padding_offset + i] = padding_value; } } } } -template -class PaddingLoDTensorFunctor { +template +class PaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::LoDTensor& seq, framework::Tensor* padding, - bool norm_by_times) { - auto lod = seq.lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The lod of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq.dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding->dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequence_length, num_sequences, sequence_width]."); - - int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be the " - "maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be the " - "number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq.numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - if (!norm_by_times && num_sequences == 1UL) { - TensorCopy(seq, context.GetPlace(), context, padding); - padding->Resize(padding_dims); + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(seq_tensor, lod_level); + + auto& lod = seq_tensor.lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor.dims(); + auto padding_dims = padding_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + const int64_t seq_num = abs_offset.size() - 1; + const int64_t seq_width = seq_tensor.numel() / seq_dims[0]; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + if (!norm_by_times && seq_num == 1UL) { + TensorCopy(seq_tensor, context.GetPlace(), context, padding_tensor); + padding_tensor->Resize(padding_dims); return; } @@ -109,72 +98,46 @@ class PaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((sequence_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_sequence_length + block_dim_y - 1) / block_dim_y; - size_t grid_dim_y = num_sequences; + size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); - const T* seq_data = seq.data(); - T* padding_data = padding->data(); - if (norm_by_times) { - SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } else { - SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } + const T* seq_data = seq_tensor.data(); + T* padding_data = padding_tensor->data(); + + SequencePaddingKernel<<>>( + padding_data, const_cast(seq_data), + abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, padding_layout, norm_by_times, padding_value); } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, - framework::LoDTensor* seq, const framework::Tensor& padding, - bool norm_by_times) { - auto lod = seq->lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The lod of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq->dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding.dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequnece_length, num_sequences, sequence_width]."); - - int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be " - "the maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be " - "the number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq->numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - if (!norm_by_times && num_sequences == 1UL) { - TensorCopy(padding, context.GetPlace(), context, seq); - seq->Resize(seq_dims); + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(*seq_tensor, lod_level); + + auto& lod = seq_tensor->lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor->dims(); + auto padding_dims = padding_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + + if (!norm_by_times && seq_num == 1UL) { + TensorCopy(padding_tensor, context.GetPlace(), context, seq_tensor); + seq_tensor->Resize(seq_dims); return; } @@ -184,32 +147,28 @@ class UnpaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((sequence_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_sequence_length + block_dim_y - 1) / block_dim_y; - size_t grid_dim_y = num_sequences; + size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); - const T* padding_data = padding.data(); - T* seq_data = seq->data(); - if (norm_by_times) { - SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } else { - SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } + const T* padding_data = padding_tensor.data(); + T* seq_data = seq_tensor->data(); + + SequencePaddingKernel<<>>( + const_cast(padding_data), seq_data, + abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, padding_layout, norm_by_times); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index b56e6db1eb..91d205641a 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -22,17 +22,50 @@ namespace paddle { namespace operators { namespace math { +enum PaddingLayout { BATCH_LENGTH_WIDTH, LENGTH_BATCH_WIDTH }; + inline static size_t MaximumSequenceLength(const framework::LoD& lod, const size_t level) { - const size_t num_sequences = lod[level].size() - 1; - size_t max_sequence_length = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - for (size_t i = 0; i < num_sequences; ++i) { - max_sequence_length = - std::max(max_sequence_length, - abs_offset_lod[level][i + 1] - abs_offset_lod[level][i]); + const size_t seq_num = lod[level].size() - 1; + size_t max_seq_len = 0; + auto abs_offset = framework::ToAbsOffset(lod)[level]; + for (size_t i = 0; i < seq_num; ++i) { + max_seq_len = std::max(max_seq_len, abs_offset[i + 1] - abs_offset[i]); + } + return max_seq_len; +} + +inline static void ValidateLoD(const framework::LoDTensor& seq_tensor, + const size_t& lod_level) { + PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(), + "Invalid `lod_level` which should be at least 0 and less " + "than maximum lod level of `seq_tensor`."); +} + +inline static void ValidateShape(const framework::DDim& seq_tensor_dims, + const size_t& abs_offset_back_value, + const framework::DDim& padding_tensor_dims, + const int64_t& max_seq_len, + const int64_t& seq_num, + const int64_t& seq_width, + const PaddingLayout& padding_layout) { + PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), + abs_offset_back_value, + "The 1st dimension of `seq_tensor` should be equal to " + "sum of lengths of all sequences."); + + PADDLE_ENFORCE_EQ(padding_tensor_dims.size(), 3UL, + "`padding_tensor` should be a 3-D tensor."); + + if (padding_layout == BATCH_LENGTH_WIDTH) { + PADDLE_ENFORCE_EQ(padding_tensor_dims, + framework::make_ddim({seq_num, max_seq_len, seq_width})); + } else if (padding_layout == LENGTH_BATCH_WIDTH) { + PADDLE_ENFORCE_EQ(padding_tensor_dims, + framework::make_ddim({max_seq_len, seq_num, seq_width})); + } else { + PADDLE_THROW("Unsupported padding layout."); } - return max_sequence_length; } /* @@ -61,18 +94,23 @@ inline static size_t MaximumSequenceLength(const framework::LoD& lod, * * \note transposition is also done in this functor. */ -template +template class PaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, const framework::LoDTensor& seq, - framework::Tensor* padding, bool norm_by_times); + void operator()(const DeviceContext& context, + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0); }; -template +template class UnpaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, framework::LoDTensor* seq, - const framework::Tensor& padding, bool norm_by_times); + void operator()(const DeviceContext& context, + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0); }; } // namespace math diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index 183d38fcc9..f3a6fff0e1 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -32,7 +32,11 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Only support 2-D tensor, rank of Input(X) should be 2."); - auto out_dims = x_dims; + int lod_level = ctx->Attrs().Get("lod_level"); + + int64_t max_len = -1; + int64_t seq_num = -1; + int x_lod_size = -1; if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -40,27 +44,31 @@ class SequencePadOp : public framework::OperatorWithKernel { auto& x_lod = x_var->Get().lod(); - PADDLE_ENFORCE_GE(x_lod.size(), 1, - "Input(X) should be sequences containing lod."); + x_lod_size = x_lod.size(); + + auto x_abs_offset = framework::ToAbsOffset(x_lod)[lod_level]; + + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_abs_offset.back()), + "The first dimension of `X` should be equal to sum " + "of all sequences' length."); - auto last_level_lod = x_lod[x_lod.size() - 1]; - size_t max_len = 0; + seq_num = x_abs_offset.size() - 1; - for (size_t i = 1; i < last_level_lod.size(); ++i) { - auto seq_len = last_level_lod[i] - last_level_lod[i - 1]; + for (size_t i = 1; i <= seq_num; ++i) { + int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1]; max_len = max_len < seq_len ? seq_len : max_len; } - - out_dims[0] = max_len * (last_level_lod.size() - 1); } else { framework::VarDesc* x_desc = boost::get(ctx->GetInputVarPtrs("X")[0]); - PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1, - "Input(X) should be sequences containing lod."); - out_dims[0] = -1; + x_lod_size = x_desc->GetLoDLevel(); } - ctx->SetOutputDim("Out", out_dims); + PADDLE_ENFORCE(lod_level >= 0 && lod_level < x_lod_size, + "Invalid `lod_level` which should be at least 0 and less " + "than maximum lod level of `X`"); + + ctx->SetOutputDim("Out", {seq_num, max_len, x_dims[1]}); } protected: @@ -84,9 +92,11 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) Output variable which would be a common tensor " "without lod. Each sequence would be padded to the maximum " "length."); + AddAttr("lod_level", + "(int, default 0) Specify which level lod to referred to."); AddAttr("pad_value", - "(float, default 0.0) Value to be padded " - "to the end of each sequence."); + "(float, default 0.0) Specify which value to be padded to " + "the end of each sequence."); AddComment(R"DOC( )DOC"); diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h index b36465d8e7..6d136b65f1 100644 --- a/paddle/fluid/operators/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/sequence_padding.h" namespace paddle { namespace operators { @@ -23,39 +24,68 @@ namespace operators { using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; -// @TODO clean code +template +struct CopyFunctor { + LoDTensor* lod_tensor_; + LoDTensor* pad_tensor_; + const LoD& ref_lod_; + const DeviceContext& ctx_; + bool is_lod_to_pad_; + + CopyFunctor(LoDTensor* lod_tensor, const LoD& ref_lod, LoDTensor* pad_tensor, + const DeviceContext& ctx, bool is_lod_to_pad) + : lod_tensor_(lod_tensor), + pad_tensor_(pad_tensor), + ref_lod_(ref_lod), + ctx_(ctx), + is_lod_to_pad_(is_lod_to_pad) {} + + void operator()() const { + /* + auto seq_num = ref_lod_.size() - 1; + auto max_len = pad_tensor_->dims()[0] / seq_num; + + PADDLE_ENFORCE_EQ(max_len * seq_num, pad_tensor_->dims()[0], + "First dimension of padded tensor should be equal to " + "maximum sequence length mulplied by sequence number."); + + for (size_t i = 1; i < ref_lod_.size(); ++i) { + auto seq_start = ref_lod_[i - 1]; + auto seq_end = ref_lod_[i]; + auto pad_start = (i - 1) * max_len; + auto pad_end = pad_start + (seq_end - seq_start); + auto sub_lod_tensor = lod_tensor_->Slice(seq_start, seq_end); + auto sub_pad_tensor = pad_tensor_->Slice(pad_start, pad_end); + if (is_lod_to_pad_) { + framework::TensorCopy(sub_lod_tensor, ctx.GetPlace(), &sub_pad_tensor); + } else { + framework::TensorCopy(sub_pad_tensor, ctx.GetPlace(), &sub_lod_tensor); + } + } + */ + } +}; + template class SequencePadOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_ptr = ctx.Input("X"); + /* + auto* x = ctx.Input("X"); auto* out_ptr = ctx.Output("Out"); out_ptr->mutable_data(ctx.GetPlace()); + // Resize(); + T pad_value = static_cast(ctx.Attr("pad_value")); + math::PaddingLoDTensorFunctor()( + ctx.template device_context(), *x, *, false); + math::SetConstant set_func; set_func(ctx.template device_context(), out_ptr, pad_value); - - auto& x_lod = x_ptr->lod(); - auto& x_last_level_lod = x_lod[x_lod.size() - 1]; - auto seq_num = x_last_level_lod.size() - 1; - auto max_len = out_ptr->dims()[0] / seq_num; - - PADDLE_ENFORCE_EQ(max_len * seq_num, out_ptr->dims()[0], - "First dimension of `Out` should be equal to " - "maximum length mulplied by sequence number."); - - for (size_t i = 1; i < x_last_level_lod.size(); ++i) { - auto x_start = x_last_level_lod[i - 1]; - auto x_end = x_last_level_lod[i]; - auto out_start = (i - 1) * max_len; - auto out_end = out_start + (x_end - x_start); - auto x_sub_tensor = x_ptr->Slice(x_start, x_end); - auto out_sub_tensor = out_ptr->Slice(out_start, out_end); - framework::TensorCopy(x_sub_tensor, ctx.GetPlace(), &out_sub_tensor); - } + */ } }; @@ -63,33 +93,26 @@ template class SequencePadGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { + /* auto* x_ptr = ctx.Input("X"); auto* g_out_ptr = ctx.Input(framework::GradVarName("Out")); auto* g_x_ptr = ctx.Output(framework::GradVarName("X")); math::SetConstant set_func; - set_func(ctx.template device_context(), g_x_ptr, + set_func(ctx.template device_context(), + g_x_ptr, static_cast(0)); auto& x_lod = x_ptr->lod(); auto& x_last_level_lod = x_lod[x_lod.size() - 1]; - auto seq_num = x_last_level_lod.size() - 1; - int64_t max_len = g_out_ptr->dims()[0] / seq_num; - - PADDLE_ENFORCE_EQ(max_len * seq_num, g_out_ptr->dims()[0], - "First dimension of `Out` should be equal to " - "maximum length mulplied by sequence number."); - - for (size_t i = 1; i < x_last_level_lod.size(); ++i) { - auto x_start = x_last_level_lod[i - 1]; - auto x_end = x_last_level_lod[i]; - auto out_start = (i - 1) * max_len; - auto out_end = out_start + (x_end - x_start); - - auto g_out_sub = g_out_ptr->Slice(out_start, out_end); - auto g_x_sub = g_x_ptr->Slice(x_start, x_end); - framework::TensorCopy(g_x_sub, ctx.GetPlace(), &g_out_sub); - } + + CopyFunctor copy_func(g_out_ptr, + x_last_level_lod, + g_x_ptr, + ctx, + false); + copy_func(); + */ } }; diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 705cc894c0..1b649be203 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -161,7 +161,7 @@ class WarpCTCKernel : public framework::OpKernel { static_cast(num_sequences), static_cast(sequence_width)}); warpctc_logits.mutable_data(warpctc_logits_dims, ctx.GetPlace()); - math::PaddingLoDTensorFunctor()( + math::PaddingLoDTensorFunctor()( ctx.template device_context(), *logits, &warpctc_logits, false); const T* warpctc_logits_data = warpctc_logits.data(); @@ -216,7 +216,8 @@ class WarpCTCGradKernel : public framework::OpKernel { logits_grad->mutable_data(ctx.GetPlace()); bool norm_by_times = ctx.Attr("norm_by_times"); - math::UnpaddingLoDTensorFunctor()( + math::UnpaddingLoDTensorFunctor()( ctx.template device_context(), logits_grad, *warpctc_grad, norm_by_times); From 10ec329b7d8613c60d7324395ecc42e10b3ce0c0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 23 May 2018 14:28:14 +0000 Subject: [PATCH 03/10] Refine code. --- .../fluid/operators/math/sequence_padding.cc | 123 ++++++++-------- .../fluid/operators/math/sequence_padding.cu | 136 +++++++++--------- .../fluid/operators/math/sequence_padding.h | 69 +++++---- .../operators/math/sequence_padding_test.cc | 10 +- paddle/fluid/operators/sequence_pad_op.cc | 2 +- paddle/fluid/operators/warpctc_op.h | 12 +- 6 files changed, 183 insertions(+), 169 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 2dd2cafa23..5ceb26553c 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -18,111 +18,114 @@ namespace paddle { namespace operators { namespace math { -template +template void CopyDataCPU(framework::LoDTensor* seq_tensor, - framework::Tensor* padding_tensor, - const framework::Vector& abs_offset, + framework::Tensor* pad_tensor, + const framework::Vector& seq_offset, const int64_t& max_seq_len, const int64_t& seq_width, - bool seq_to_padding, bool norm_by_len) { + bool seq_to_pad, bool norm_by_len, + OutputLayout output_layout) { T* seq_data = seq_tensor->data(); - T* padding_data = padding_tensor->data(); + T* pad_data = pad_tensor->data(); - int64_t seq_num = abs_offset.size() - 1; + int64_t seq_num = seq_offset.size() - 1; for (int64_t i = 0; i < seq_num; ++i) { - int64_t seq_start = abs_offset[i]; - int64_t seq_len = abs_offset[i + 1] - seq_start; - + int64_t seq_start = seq_offset[i]; + int64_t seq_len = seq_offset[i + 1] - seq_start; T scale = norm_by_len ? (1.0f / static_cast(seq_len)) : 1.0f; - for (int64_t j = 0; j < seq_len; ++j) { for (int64_t k = 0; k < seq_width; ++k) { - size_t padding_offset = 0; - if (padding_layout == BATCH_LENGTH_WIDTH) { - padding_offset = (i * max_seq_len * seq_width) + j * seq_width + k; + size_t pad_data_idx = 0; + size_t seq_data_idx = (seq_start + j) * seq_width + k; + if (output_layout == kBatchLengthWidth) { + pad_data_idx = (i * max_seq_len + j) * seq_width + k; } else { - padding_offset = (j * seq_num * seq_width) + i * seq_width + k; + pad_data_idx = (j * seq_num + i) * seq_width + k; } - if (seq_to_padding) { - padding_data[padding_offset] = - seq_data[(seq_start + j) * seq_width + k] * scale; + if (seq_to_pad) { + pad_data[pad_data_idx] = seq_data[seq_data_idx] * scale; } else { - seq_data[(seq_start + j) * seq_width + k] = - padding_data[padding_offset] * scale; + seq_data[seq_data_idx] = pad_data[pad_data_idx] * scale; } } } } } -template -class PaddingLoDTensorFunctor { +template +class PaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* padding_tensor, - T padding_value = static_cast(0), - bool norm_by_times = false, size_t lod_level = 0) { - ValidateLoD(seq_tensor, lod_level); + framework::Tensor* pad_tensor, + T pad_value = static_cast(0), bool norm_by_times = false, + size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth) { + CheckLoD(seq_tensor, lod_level); auto& lod = seq_tensor.lod(); - auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - auto seq_dims = seq_tensor.dims(); - auto padding_dims = padding_tensor->dims(); - int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); - int64_t seq_num = abs_offset.size() - 1; - int64_t seq_width = seq_tensor.numel() / seq_dims[0]; - int64_t numel = max_seq_len * seq_num * seq_width; + auto seq_tensor_dims = seq_tensor.dims(); + auto pad_tensor_dims = pad_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(seq_offset); + int64_t seq_num = seq_offset.size() - 1; + int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0]; - ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, - seq_num, seq_width, padding_layout); + CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, + seq_num, seq_width, output_layout); - T* padding_data = padding_tensor->data(); + T* pad_data = pad_tensor->data(); - memset(padding_data, padding_value, numel * sizeof(T)); + memset(pad_data, pad_value, max_seq_len * seq_num * seq_width * sizeof(T)); - CopyDataCPU( - const_cast(&seq_tensor), padding_tensor, - abs_offset, max_seq_len, seq_width, true /* seq_to_padding */, - norm_by_times); + CopyDataCPU(const_cast(&seq_tensor), pad_tensor, + seq_offset, max_seq_len, seq_width, true /* seq_to_pad */, + norm_by_times, output_layout); } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, framework::LoDTensor* seq_tensor, - const framework::Tensor& padding_tensor, - bool norm_by_times = false, size_t lod_level = 0) { - ValidateLoD(*seq_tensor, lod_level); + const framework::Tensor& pad_tensor, + bool norm_by_times = false, size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth) { + CheckLoD(*seq_tensor, lod_level); auto& lod = seq_tensor->lod(); - auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - auto& seq_dims = seq_tensor->dims(); - auto& padding_dims = padding_tensor.dims(); - int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); - int64_t seq_num = abs_offset.size() - 1; - int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + auto& seq_tensor_dims = seq_tensor->dims(); + auto& pad_tensor_dims = pad_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(seq_offset); + int64_t seq_num = seq_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0]; - ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, - seq_num, seq_width, padding_layout); + CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, + seq_num, seq_width, output_layout); T* seq_data = seq_tensor->data(); memset(seq_data, static_cast(0), seq_tensor->numel() * sizeof(T)); - CopyDataCPU( - seq_tensor, const_cast(&padding_tensor), abs_offset, - max_seq_len, seq_width, false /* seq_to_padding */, norm_by_times); + CopyDataCPU(seq_tensor, const_cast(&pad_tensor), + seq_offset, max_seq_len, seq_width, false /* seq_to_pad */, + norm_by_times, output_layout); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; + +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 2377bef024..20e3e3de2a 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -21,74 +21,74 @@ namespace math { template __global__ void SequencePaddingKernel( - T* padding_data, T* seq_data, const size_t* abs_offset, - const size_t& seq_num, const size_t& max_seq_len, const size_t& seq_width, - const PaddingLayout& padding_layout, bool norm_by_times = false, - const T& padding_value = 0) { - size_t padding_idx = blockIdx.y; - size_t seq_start = abs_offset[padding_idx]; - size_t seq_len = abs_offset[padding_idx + 1] - seq_start; + T* pad_data, T* seq_data, const size_t* seq_offset, const size_t& seq_num, + const size_t& max_seq_len, const size_t& seq_width, bool norm_by_times, + const T& pad_value, const OutputLayout& output_layout) { + size_t seq_idx = blockIdx.y; + size_t seq_start = seq_offset[seq_idx]; + size_t seq_len = seq_offset[seq_idx + 1] - seq_start; - size_t seq_idx = blockIdx.x * blockDim.y + threadIdx.y; + size_t seq_step_idx = blockIdx.x * blockDim.y + threadIdx.y; - size_t seq_offset = (seq_start + seq_idx) * seq_width; + size_t seq_data_offset = (seq_start + seq_step_idx) * seq_width; - size_t padding_offset = 0; + size_t pad_data_offset = 0; - if (padding_layout == LENGTH_BATCH_WIDTH) { - padding_offset = (seq_idx * seq_num + padding_idx) * seq_width; + if (output_layout == kLengthBatchWidth) { + pad_data_offset = (seq_step_idx * seq_num + seq_idx) * seq_width; } else { - padding_offset = (padding_idx * max_seq_len + seq_idx) * seq_width; + pad_data_offset = (seq_idx * max_seq_len + seq_step_idx) * seq_width; } - if (seq_idx < seq_len) { + if (seq_step_idx < seq_len) { T scale = norm_by_times ? (1.0f / static_cast(seq_len)) : 1.0f; if (Padding) { - /* sequence -> padding */ + /* seq -> pad */ for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - padding_data[padding_offset + i] = scale * seq_data[seq_offset + i]; + pad_data[pad_data_offset + i] = scale * seq_data[seq_data_offset + i]; } } else { - /* padding -> sequence */ + /* pad -> seq */ for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - seq_data[seq_offset + i] = scale * padding_data[padding_offset + i]; + seq_data[seq_data_offset + i] = scale * pad_data[pad_data_offset + i]; } } - } else if (seq_idx < max_seq_len) { + } else if (seq_step_idx < max_seq_len) { if (Padding) { - /* sequence -> padding */ + /* seq -> pad */ for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - padding_data[padding_offset + i] = padding_value; + pad_data[pad_data_offset + i] = pad_value; } } } } -template -class PaddingLoDTensorFunctor { +template +class PaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* padding_tensor, - T padding_value = static_cast(0), - bool norm_by_times = false, size_t lod_level = 0) { - ValidateLoD(seq_tensor, lod_level); + framework::Tensor* pad_tensor, + T pad_value = static_cast(0), bool norm_by_times = false, + size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth) { + CheckLoD(seq_tensor, lod_level); auto& lod = seq_tensor.lod(); - auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - auto seq_dims = seq_tensor.dims(); - auto padding_dims = padding_tensor->dims(); - int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); - const int64_t seq_num = abs_offset.size() - 1; - const int64_t seq_width = seq_tensor.numel() / seq_dims[0]; + auto seq_tensor_dims = seq_tensor.dims(); + auto pad_tensor_dims = pad_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(seq_offset); + int64_t seq_num = seq_offset.size() - 1; + int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0]; - ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, - seq_num, seq_width, padding_layout); + CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, + seq_num, seq_width, output_layout); if (!norm_by_times && seq_num == 1UL) { - TensorCopy(seq_tensor, context.GetPlace(), context, padding_tensor); - padding_tensor->Resize(padding_dims); + TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor); + pad_tensor->Resize(pad_tensor_dims); return; } @@ -107,37 +107,40 @@ class PaddingLoDTensorFunctor { dim3 grid(grid_dim_x, grid_dim_y); const T* seq_data = seq_tensor.data(); - T* padding_data = padding_tensor->data(); + T* pad_data = pad_tensor->data(); SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), - abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, - seq_width, padding_layout, norm_by_times, padding_value); + pad_data, const_cast(seq_data), + seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, norm_by_times, pad_value, output_layout); } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, framework::LoDTensor* seq_tensor, - const framework::Tensor& padding_tensor, - bool norm_by_times = false, size_t lod_level = 0) { - ValidateLoD(*seq_tensor, lod_level); + const framework::Tensor& pad_tensor, + bool norm_by_times = false, size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth) { + CheckLoD(*seq_tensor, lod_level); auto& lod = seq_tensor->lod(); - auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - auto seq_dims = seq_tensor->dims(); - auto padding_dims = padding_tensor.dims(); - int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); - int64_t seq_num = abs_offset.size() - 1; - int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + auto seq_tensor_dims = seq_tensor->dims(); + auto pad_tensor_dims = pad_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(seq_offset); + int64_t seq_num = seq_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0]; + + CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, + seq_num, seq_width, output_layout); if (!norm_by_times && seq_num == 1UL) { - TensorCopy(padding_tensor, context.GetPlace(), context, seq_tensor); - seq_tensor->Resize(seq_dims); + TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor); + seq_tensor->Resize(seq_tensor_dims); return; } @@ -155,20 +158,25 @@ class UnpaddingLoDTensorFunctor(); + const T* pad_data = pad_tensor.data(); T* seq_data = seq_tensor->data(); - SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, - abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, - seq_width, padding_layout, norm_by_times); + SequencePaddingKernel<<>>( + const_cast(pad_data), seq_data, + seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, norm_by_times, static_cast(0), output_layout); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; + +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 91d205641a..44d6404335 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -22,49 +22,46 @@ namespace paddle { namespace operators { namespace math { -enum PaddingLayout { BATCH_LENGTH_WIDTH, LENGTH_BATCH_WIDTH }; +enum OutputLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; -inline static size_t MaximumSequenceLength(const framework::LoD& lod, - const size_t level) { - const size_t seq_num = lod[level].size() - 1; +inline static size_t MaximumSequenceLength( + const framework::Vector& seq_offset) { + size_t seq_num = seq_offset.size() - 1; size_t max_seq_len = 0; - auto abs_offset = framework::ToAbsOffset(lod)[level]; for (size_t i = 0; i < seq_num; ++i) { - max_seq_len = std::max(max_seq_len, abs_offset[i + 1] - abs_offset[i]); + max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]); } return max_seq_len; } -inline static void ValidateLoD(const framework::LoDTensor& seq_tensor, - const size_t& lod_level) { +inline static void CheckLoD(const framework::LoDTensor& seq_tensor, + const size_t& lod_level) { PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(), - "Invalid `lod_level` which should be at least 0 and less " - "than maximum lod level of `seq_tensor`."); + "Invalid lod level which should be at least 0 and less " + "than maximum lod level of sequence tensor."); } -inline static void ValidateShape(const framework::DDim& seq_tensor_dims, - const size_t& abs_offset_back_value, - const framework::DDim& padding_tensor_dims, - const int64_t& max_seq_len, - const int64_t& seq_num, - const int64_t& seq_width, - const PaddingLayout& padding_layout) { - PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), - abs_offset_back_value, - "The 1st dimension of `seq_tensor` should be equal to " - "sum of lengths of all sequences."); +inline static void CheckDims(const framework::DDim& seq_tensor_dims, + const size_t& last_offset, + const framework::DDim& pad_tensor_dims, + const int64_t& max_seq_len, const int64_t& seq_num, + const int64_t& seq_width, + const OutputLayout& output_layout) { + PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), last_offset, + "Value of 1st dimension of the sequence tensor should be " + "equal to sum of lengths of all sequences."); - PADDLE_ENFORCE_EQ(padding_tensor_dims.size(), 3UL, - "`padding_tensor` should be a 3-D tensor."); + PADDLE_ENFORCE_EQ(pad_tensor_dims.size(), 3UL, + "Padded tensor should be a 3-D tensor."); - if (padding_layout == BATCH_LENGTH_WIDTH) { - PADDLE_ENFORCE_EQ(padding_tensor_dims, + if (output_layout == kBatchLengthWidth) { + PADDLE_ENFORCE_EQ(pad_tensor_dims, framework::make_ddim({seq_num, max_seq_len, seq_width})); - } else if (padding_layout == LENGTH_BATCH_WIDTH) { - PADDLE_ENFORCE_EQ(padding_tensor_dims, + } else if (output_layout == kLengthBatchWidth) { + PADDLE_ENFORCE_EQ(pad_tensor_dims, framework::make_ddim({max_seq_len, seq_num, seq_width})); } else { - PADDLE_THROW("Unsupported padding layout."); + PADDLE_THROW("Unsupported output layout."); } } @@ -94,23 +91,25 @@ inline static void ValidateShape(const framework::DDim& seq_tensor_dims, * * \note transposition is also done in this functor. */ -template +template class PaddingLoDTensorFunctor { public: void operator()(const DeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* padding_tensor, - T padding_value = static_cast(0), - bool norm_by_times = false, size_t lod_level = 0); + framework::Tensor* pad_tensor, + T pad_value = static_cast(0), bool norm_by_times = false, + size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth); }; -template +template class UnpaddingLoDTensorFunctor { public: void operator()(const DeviceContext& context, framework::LoDTensor* seq_tensor, - const framework::Tensor& padding_tensor, - bool norm_by_times = false, size_t lod_level = 0); + const framework::Tensor& pad_tensor, + bool norm_by_times = false, size_t lod_level = 0, + OutputLayout output_layout = kBatchLengthWidth); }; } // namespace math diff --git a/paddle/fluid/operators/math/sequence_padding_test.cc b/paddle/fluid/operators/math/sequence_padding_test.cc index b0c201db0c..82459274c4 100644 --- a/paddle/fluid/operators/math/sequence_padding_test.cc +++ b/paddle/fluid/operators/math/sequence_padding_test.cc @@ -46,20 +46,24 @@ void TestSequencePadding(const paddle::framework::LoD& lod, } const size_t max_sequence_length = - paddle::operators::math::MaximumSequenceLength(lod, level); + paddle::operators::math::MaximumSequenceLength(lod[level]); const size_t num_sequences = lod[level].size() - 1; auto padding_dims = paddle::framework::make_ddim({static_cast(max_sequence_length), static_cast(num_sequences), static_cast(sequence_width)}); + padding.mutable_data(padding_dims, *place); + paddle::operators::math::PaddingLoDTensorFunctor()( - *context, seq, &padding, false); + *context, seq, &padding, 0, false, 0, + paddle::operators::math::kLengthBatchWidth); seq_back.set_lod(lod); seq_back.mutable_data(seq_dims, *place); paddle::operators::math::UnpaddingLoDTensorFunctor()( - *context, &seq_back, padding, false); + *context, &seq_back, padding, false, 0, + paddle::operators::math::kLengthBatchWidth); if (paddle::platform::is_cpu_place(*place)) { cpu_seq_back = seq_back; diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index f3a6fff0e1..dc79b252c7 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -54,7 +54,7 @@ class SequencePadOp : public framework::OperatorWithKernel { seq_num = x_abs_offset.size() - 1; - for (size_t i = 1; i <= seq_num; ++i) { + for (int64_t i = 1; i <= seq_num; ++i) { int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1]; max_len = max_len < seq_len ? seq_len : max_len; } diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 1b649be203..075eb010c5 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -155,15 +155,16 @@ class WarpCTCKernel : public framework::OpKernel { // warpctc needs sequences data stored in transposed padding format Tensor warpctc_logits; const size_t max_sequence_length = - math::MaximumSequenceLength(logits_lod, level); + math::MaximumSequenceLength(logits_lod[level]); auto warpctc_logits_dims = framework::make_ddim({static_cast(max_sequence_length), static_cast(num_sequences), static_cast(sequence_width)}); warpctc_logits.mutable_data(warpctc_logits_dims, ctx.GetPlace()); - math::PaddingLoDTensorFunctor()( + math::PaddingLoDTensorFunctor()( ctx.template device_context(), *logits, &warpctc_logits, - false); + static_cast(0), false /* norm_by_times */, 0, + math::kLengthBatchWidth); const T* warpctc_logits_data = warpctc_logits.data(); std::vector warpctc_label_lengths(num_sequences); @@ -216,10 +217,9 @@ class WarpCTCGradKernel : public framework::OpKernel { logits_grad->mutable_data(ctx.GetPlace()); bool norm_by_times = ctx.Attr("norm_by_times"); - math::UnpaddingLoDTensorFunctor()( + math::UnpaddingLoDTensorFunctor()( ctx.template device_context(), logits_grad, - *warpctc_grad, norm_by_times); + *warpctc_grad, norm_by_times, 0, math::kLengthBatchWidth); const T* loss_grad_data = loss_grad->data(); math::ScaleLoDTensorFunctor()( From 3c749fae43765a1543b450a9a21ac514a1d9a535 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 16 Aug 2018 20:53:47 +0800 Subject: [PATCH 04/10] update CPU sequence_padding functor --- .../fluid/operators/math/sequence_padding.cc | 149 +++++++++--------- .../fluid/operators/math/sequence_padding.h | 56 +++---- .../operators/math/sequence_padding_test.cc | 6 +- paddle/fluid/operators/warpctc_op.h | 10 +- 4 files changed, 108 insertions(+), 113 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 5ceb26553c..e8ccf006ad 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -18,37 +18,45 @@ namespace paddle { namespace operators { namespace math { +enum CopyType { kSeqToPad, kPadToSeq }; + template -void CopyDataCPU(framework::LoDTensor* seq_tensor, - framework::Tensor* pad_tensor, - const framework::Vector& seq_offset, - const int64_t& max_seq_len, const int64_t& seq_width, - bool seq_to_pad, bool norm_by_len, - OutputLayout output_layout) { - T* seq_data = seq_tensor->data(); - T* pad_data = pad_tensor->data(); - - int64_t seq_num = seq_offset.size() - 1; - - for (int64_t i = 0; i < seq_num; ++i) { - int64_t seq_start = seq_offset[i]; - int64_t seq_len = seq_offset[i + 1] - seq_start; - T scale = norm_by_len ? (1.0f / static_cast(seq_len)) : 1.0f; - for (int64_t j = 0; j < seq_len; ++j) { - for (int64_t k = 0; k < seq_width; ++k) { - size_t pad_data_idx = 0; - size_t seq_data_idx = (seq_start + j) * seq_width + k; - if (output_layout == kBatchLengthWidth) { - pad_data_idx = (i * max_seq_len + j) * seq_width + k; - } else { - pad_data_idx = (j * seq_num + i) * seq_width + k; - } - if (seq_to_pad) { - pad_data[pad_data_idx] = seq_data[seq_data_idx] * scale; - } else { - seq_data[seq_data_idx] = pad_data[pad_data_idx] * scale; +void CopyValidData(framework::Tensor* dst_tensor, + const framework::Tensor* src_tensor, + const framework::Vector& seq_offsets, + int pad_seq_len, int step_width, bool norm_by_len, + CopyType type, PadLayout layout) { + int seq_num = seq_offsets.size() - 1; + const T* src_data = src_tensor->data(); + T* dst_data = dst_tensor->data(); + + int seq_cpy_gap = step_width; + int pad_cpy_gap = + layout == kBatchLengthWidth ? step_width : seq_num * step_width; + for (int seq_idx = 0; seq_idx < seq_num; ++seq_idx) { + int valid_seq_len = seq_offsets[seq_idx + 1] - seq_offsets[seq_idx]; + PADDLE_ENFORCE_GE( + pad_seq_len, valid_seq_len, + "The padded sequence length can not be less than its original length."); + int seq_data_offset = seq_offsets[seq_idx] * step_width; + int pad_data_offset = layout == kBatchLengthWidth + ? seq_idx * pad_seq_len * step_width + : seq_idx * step_width; + float scale = 1.0f / static_cast(valid_seq_len); + + for (int step_idx = 0; step_idx < valid_seq_len; ++step_idx) { + const T* src = + src_data + (type == kSeqToPad ? seq_data_offset : pad_data_offset); + T* dst = + dst_data + (type == kSeqToPad ? pad_data_offset : seq_data_offset); + memcpy(dst, src, step_width * sizeof(T)); + if (norm_by_len) { + for (int i = 0; i < step_width; ++i) { + *(dst + i) *= scale; } } + seq_data_offset += seq_cpy_gap; + pad_data_offset += pad_cpy_gap; } } } @@ -58,31 +66,37 @@ class PaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* pad_tensor, - T pad_value = static_cast(0), bool norm_by_times = false, - size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth) { - CheckLoD(seq_tensor, lod_level); - - auto& lod = seq_tensor.lod(); - auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - + framework::LoDTensor* pad_tensor, + std::vector pad_value = {0}, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout layout = kBatchLengthWidth) { + auto seq_offsets = framework::ToAbsOffset(seq_tensor.lod())[lod_level]; auto seq_tensor_dims = seq_tensor.dims(); auto pad_tensor_dims = pad_tensor->dims(); - int64_t max_seq_len = MaximumSequenceLength(seq_offset); - int64_t seq_num = seq_offset.size() - 1; - int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0]; + if (pad_seq_len == -1) { + pad_seq_len = MaximumSequenceLength(seq_offsets); + } + int step_width = seq_tensor.numel() / seq_tensor_dims[0]; - CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, - seq_num, seq_width, output_layout); + CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, + step_width, layout); + PADDLE_ENFORCE(pad_value.size() == 1 || + static_cast(pad_value.size()) == step_width, + "The size of 'pad_value' can only be 1 or be equal to the " + "'step_width'."); - T* pad_data = pad_tensor->data(); + if (pad_value.size() == 1) { + pad_value = std::vector(step_width, pad_value[0]); + } - memset(pad_data, pad_value, max_seq_len * seq_num * seq_width * sizeof(T)); + // fill padding value + T* pad_data = pad_tensor->data(); + for (int i = 0; i < pad_tensor->numel() / step_width; ++i) { + memcpy(pad_data, pad_value.data(), step_width * sizeof(T)); + } - CopyDataCPU(const_cast(&seq_tensor), pad_tensor, - seq_offset, max_seq_len, seq_width, true /* seq_to_pad */, - norm_by_times, output_layout); + CopyValidData(pad_tensor, &seq_tensor, seq_offsets, pad_seq_len, + step_width, norm_by_times, kSeqToPad, layout); } }; @@ -90,30 +104,23 @@ template class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, - framework::LoDTensor* seq_tensor, - const framework::Tensor& pad_tensor, - bool norm_by_times = false, size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth) { - CheckLoD(*seq_tensor, lod_level); - - auto& lod = seq_tensor->lod(); - auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - - auto& seq_tensor_dims = seq_tensor->dims(); - auto& pad_tensor_dims = pad_tensor.dims(); - int64_t max_seq_len = MaximumSequenceLength(seq_offset); - int64_t seq_num = seq_offset.size() - 1; - int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0]; - - CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, - seq_num, seq_width, output_layout); - - T* seq_data = seq_tensor->data(); - memset(seq_data, static_cast(0), seq_tensor->numel() * sizeof(T)); - - CopyDataCPU(seq_tensor, const_cast(&pad_tensor), - seq_offset, max_seq_len, seq_width, false /* seq_to_pad */, - norm_by_times, output_layout); + const framework::LoDTensor& pad_tensor, + framework::LoDTensor* seq_tensor, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout& layout = kBatchLengthWidth) { + auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; + auto seq_tensor_dims = seq_tensor->dims(); + auto pad_tensor_dims = pad_tensor.dims(); + if (pad_seq_len == -1) { + pad_seq_len = MaximumSequenceLength(seq_offsets); + } + int step_width = seq_tensor->numel() / seq_tensor_dims[0]; + + CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, + step_width, layout); + + CopyValidData(seq_tensor, &pad_tensor, seq_offsets, pad_seq_len, + step_width, norm_by_times, kPadToSeq, layout); } }; diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 44d6404335..d5790e2ba2 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device_context.h" @@ -22,7 +23,7 @@ namespace paddle { namespace operators { namespace math { -enum OutputLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; +enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; inline static size_t MaximumSequenceLength( const framework::Vector& seq_offset) { @@ -34,35 +35,22 @@ inline static size_t MaximumSequenceLength( return max_seq_len; } -inline static void CheckLoD(const framework::LoDTensor& seq_tensor, - const size_t& lod_level) { - PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(), - "Invalid lod level which should be at least 0 and less " - "than maximum lod level of sequence tensor."); -} - inline static void CheckDims(const framework::DDim& seq_tensor_dims, - const size_t& last_offset, const framework::DDim& pad_tensor_dims, - const int64_t& max_seq_len, const int64_t& seq_num, - const int64_t& seq_width, - const OutputLayout& output_layout) { - PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), last_offset, + const framework::Vector& seq_offset, + int64_t padded_seq_len, int64_t step_width, + const PadLayout& layout) { + PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), seq_offset.back(), "Value of 1st dimension of the sequence tensor should be " "equal to sum of lengths of all sequences."); - PADDLE_ENFORCE_EQ(pad_tensor_dims.size(), 3UL, - "Padded tensor should be a 3-D tensor."); + PADDLE_ENFORCE(seq_tensor_dims.size() == 1 || seq_tensor_dims.size() == 2, + "seq_tensor's rank should be 1 or 2."); - if (output_layout == kBatchLengthWidth) { - PADDLE_ENFORCE_EQ(pad_tensor_dims, - framework::make_ddim({seq_num, max_seq_len, seq_width})); - } else if (output_layout == kLengthBatchWidth) { - PADDLE_ENFORCE_EQ(pad_tensor_dims, - framework::make_ddim({max_seq_len, seq_num, seq_width})); - } else { - PADDLE_THROW("Unsupported output layout."); - } + PADDLE_ENFORCE(seq_tensor_dims.size() + 1 == pad_tensor_dims.size() || + seq_tensor_dims.size() == pad_tensor_dims.size(), + "pad_tensor's rank should be 1 greater than seq_tensor's " + "rank, or be equal with it."); } /* @@ -94,22 +82,22 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims, template class PaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, + void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* pad_tensor, - T pad_value = static_cast(0), bool norm_by_times = false, - size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth); + framework::LoDTensor* pad_tensor, + std::vector pad_value = {0}, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout layout = kBatchLengthWidth); }; template class UnpaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, - framework::LoDTensor* seq_tensor, - const framework::Tensor& pad_tensor, - bool norm_by_times = false, size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth); + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& pad_tensor, + framework::LoDTensor* seq_tensor, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout& layout = kBatchLengthWidth); }; } // namespace math diff --git a/paddle/fluid/operators/math/sequence_padding_test.cc b/paddle/fluid/operators/math/sequence_padding_test.cc index 82459274c4..3171c7c33e 100644 --- a/paddle/fluid/operators/math/sequence_padding_test.cc +++ b/paddle/fluid/operators/math/sequence_padding_test.cc @@ -23,7 +23,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod, paddle::framework::LoDTensor cpu_seq_back; paddle::framework::LoDTensor seq; paddle::framework::LoDTensor seq_back; - paddle::framework::Tensor padding; + paddle::framework::LoDTensor padding; const size_t level = lod.size() - 1; auto seq_dims = @@ -56,13 +56,13 @@ void TestSequencePadding(const paddle::framework::LoD& lod, padding.mutable_data(padding_dims, *place); paddle::operators::math::PaddingLoDTensorFunctor()( - *context, seq, &padding, 0, false, 0, + *context, seq, &padding, {0}, -1, 0, false, paddle::operators::math::kLengthBatchWidth); seq_back.set_lod(lod); seq_back.mutable_data(seq_dims, *place); paddle::operators::math::UnpaddingLoDTensorFunctor()( - *context, &seq_back, padding, false, 0, + *context, padding, &seq_back, -1, 0, false, paddle::operators::math::kLengthBatchWidth); if (paddle::platform::is_cpu_place(*place)) { diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index cb56f42a8d..6cbf985039 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -153,7 +153,7 @@ class WarpCTCKernel : public framework::OpKernel { framework::make_ddim({static_cast(num_sequences), 1}); // warpctc needs sequences data stored in transposed padding format - Tensor warpctc_logits; + LoDTensor warpctc_logits; const size_t max_sequence_length = math::MaximumSequenceLength(logits_lod[level]); auto warpctc_logits_dims = @@ -163,7 +163,7 @@ class WarpCTCKernel : public framework::OpKernel { warpctc_logits.mutable_data(warpctc_logits_dims, ctx.GetPlace()); math::PaddingLoDTensorFunctor()( ctx.template device_context(), *logits, &warpctc_logits, - static_cast(0), false /* norm_by_times */, 0, + {static_cast(0)}, -1, 0, false /* norm_by_times */, math::kLengthBatchWidth); const T* warpctc_logits_data = warpctc_logits.data(); @@ -210,15 +210,15 @@ template class WarpCTCGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* warpctc_grad = ctx.Input("WarpCTCGrad"); + auto* warpctc_grad = ctx.Input("WarpCTCGrad"); auto* logits_grad = ctx.Output(framework::GradVarName("Logits")); const Tensor* loss_grad = ctx.Input(framework::GradVarName("Loss")); logits_grad->mutable_data(ctx.GetPlace()); bool norm_by_times = ctx.Attr("norm_by_times"); math::UnpaddingLoDTensorFunctor()( - ctx.template device_context(), logits_grad, - *warpctc_grad, norm_by_times, 0, math::kLengthBatchWidth); + ctx.template device_context(), *warpctc_grad, + logits_grad, -1, 0, norm_by_times, math::kLengthBatchWidth); const T* loss_grad_data = loss_grad->data(); math::ScaleLoDTensorFunctor()( From 8d8d48a34f9116f5a501d69cc4dbbf9ce13a1446 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 17 Aug 2018 17:58:12 +0800 Subject: [PATCH 05/10] Complete sequence_pad_op and its CPU kernel. Add unittests --- .../fluid/operators/math/sequence_padding.cc | 24 +++- .../fluid/operators/math/sequence_padding.h | 3 - paddle/fluid/operators/sequence_pad_op.cc | 105 +++++++------- paddle/fluid/operators/sequence_pad_op.cu | 10 +- paddle/fluid/operators/sequence_pad_op.h | 93 +++--------- .../tests/unittests/test_sequence_pad_op.py | 134 ++++++++++++++++++ 6 files changed, 234 insertions(+), 135 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_sequence_pad_op.py diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index e8ccf006ad..d3dab64f60 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -70,9 +70,10 @@ class PaddingLoDTensorFunctor { std::vector pad_value = {0}, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, const PadLayout layout = kBatchLengthWidth) { - auto seq_offsets = framework::ToAbsOffset(seq_tensor.lod())[lod_level]; - auto seq_tensor_dims = seq_tensor.dims(); - auto pad_tensor_dims = pad_tensor->dims(); + auto seq_lod = seq_tensor.lod(); + const auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level]; + const auto& seq_tensor_dims = seq_tensor.dims(); + const auto& pad_tensor_dims = pad_tensor->dims(); if (pad_seq_len == -1) { pad_seq_len = MaximumSequenceLength(seq_offsets); } @@ -91,12 +92,21 @@ class PaddingLoDTensorFunctor { // fill padding value T* pad_data = pad_tensor->data(); - for (int i = 0; i < pad_tensor->numel() / step_width; ++i) { - memcpy(pad_data, pad_value.data(), step_width * sizeof(T)); + for (int i = 0; i < pad_tensor->numel(); i += step_width) { + memcpy(pad_data + i, pad_value.data(), step_width * sizeof(T)); } CopyValidData(pad_tensor, &seq_tensor, seq_offsets, pad_seq_len, step_width, norm_by_times, kSeqToPad, layout); + + // Set pad_tensor's lod info if possible + if (layout == kBatchLengthWidth) { + framework::LoD pad_lod(seq_lod.begin() + lod_level, seq_lod.end()); + for (size_t i = 0; i < pad_lod[0].size(); ++i) { + pad_lod[0][i] = i * pad_seq_len; + } + pad_tensor->set_lod(pad_lod); + } } }; @@ -109,8 +119,8 @@ class UnpaddingLoDTensorFunctor { int lod_level = 0, bool norm_by_times = false, const PadLayout& layout = kBatchLengthWidth) { auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; - auto seq_tensor_dims = seq_tensor->dims(); - auto pad_tensor_dims = pad_tensor.dims(); + const auto& seq_tensor_dims = seq_tensor->dims(); + const auto& pad_tensor_dims = pad_tensor.dims(); if (pad_seq_len == -1) { pad_seq_len = MaximumSequenceLength(seq_offsets); } diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index d5790e2ba2..9b8c892c53 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -44,9 +44,6 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims, "Value of 1st dimension of the sequence tensor should be " "equal to sum of lengths of all sequences."); - PADDLE_ENFORCE(seq_tensor_dims.size() == 1 || seq_tensor_dims.size() == 2, - "seq_tensor's rank should be 1 or 2."); - PADDLE_ENFORCE(seq_tensor_dims.size() + 1 == pad_tensor_dims.size() || seq_tensor_dims.size() == pad_tensor_dims.size(), "pad_tensor's rank should be 1 greater than seq_tensor's " diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index dc79b252c7..f23710cf4d 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -21,82 +21,85 @@ class SequencePadOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of SequencePadOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("PadValue"), + "Input(PadValue) of SequencePadOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequencePadOp should not be null."); auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_GE(x_dims.size(), 2, + "The rank of Input(x) can't be less than 2."); + auto time_step_dims = framework::slice_ddim(x_dims, 1, x_dims.size()); + auto pad_value_dims = ctx->GetInputDim("PadValue"); + PADDLE_ENFORCE(pad_value_dims == framework::make_ddim({1}) || + pad_value_dims == time_step_dims, + "The Input(PadValue) must be a scalar or a tensor whose " + "shape equals to time steps in sequences"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, - "Only support 2-D tensor, rank of Input(X) should be 2."); - - int lod_level = ctx->Attrs().Get("lod_level"); - - int64_t max_len = -1; - int64_t seq_num = -1; - int x_lod_size = -1; + int batch_dim_size = -1; if (ctx->IsRuntime()) { + // run time framework::Variable* x_var = boost::get(ctx->GetInputVarPtrs("X")[0]); - - auto& x_lod = x_var->Get().lod(); - - x_lod_size = x_lod.size(); - - auto x_abs_offset = framework::ToAbsOffset(x_lod)[lod_level]; - - PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_abs_offset.back()), - "The first dimension of `X` should be equal to sum " - "of all sequences' length."); - - seq_num = x_abs_offset.size() - 1; - - for (int64_t i = 1; i <= seq_num; ++i) { - int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1]; - max_len = max_len < seq_len ? seq_len : max_len; + const auto& x_lod = x_var->Get().lod(); + PADDLE_ENFORCE(!x_lod.empty(), "The Input(X) must hold lod info."); + const auto& x_lod_0 = x_lod[0]; + PADDLE_ENFORCE_GE(x_lod_0.size(), 2, + "The Input(X)'s lod info is corrupted."); + PADDLE_ENFORCE_EQ( + x_dims[0], static_cast(x_lod_0.back()), + "The Input(X)'s lod info mismatches the actual tensor shape."); + + int seq_num = x_lod_0.size() - 1; + int max_seq_len = math::MaximumSequenceLength(x_lod_0); + int padded_length = ctx->Attrs().Get("padded_length"); + if (padded_length == -1) { + padded_length = max_seq_len; } + PADDLE_ENFORCE_GE(padded_length, max_seq_len, + "The Attr(padded_length) must be -1 or an int greater " + "than the length of the longest original sequence."); + batch_dim_size = padded_length * seq_num; } else { + // compile time framework::VarDesc* x_desc = boost::get(ctx->GetInputVarPtrs("X")[0]); - x_lod_size = x_desc->GetLoDLevel(); + PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1); } - PADDLE_ENFORCE(lod_level >= 0 && lod_level < x_lod_size, - "Invalid `lod_level` which should be at least 0 and less " - "than maximum lod level of `X`"); - - ctx->SetOutputDim("Out", {seq_num, max_len, x_dims[1]}); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - framework::ToDataType(ctx.Input("X")->type()), - ctx.device_context()); + auto out_dims = x_dims; + out_dims[0] = batch_dim_size; + ctx->SetOutputDim("Out", out_dims); } }; class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { public: - SequencePadOpMaker(OpProto* proto, OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { + void Make() override { AddInput("X", "(LoDTensor, default LoDTensor) Input variable which " - "should contain lod information. Length of each sequence would " - "be computed from the most bottom level lod."); - AddOutput("Out", - "(Tensor) Output variable which would be a common tensor " - "without lod. Each sequence would be padded to the maximum " - "length."); - AddAttr("lod_level", - "(int, default 0) Specify which level lod to referred to."); - AddAttr("pad_value", - "(float, default 0.0) Specify which value to be padded to " - "the end of each sequence."); + "should contain lod information."); + AddInput("PadValue", + "(LoDTensor), this Tensor holds values that will be fill into " + "padded steps. It can be a scalar or a tensor whose shape equals " + "to time steps in sequences. If it's a scalar, it will be " + "automatically broadcasted to the shape of time step."); + AddOutput( + "Out", + "(LoDTensor) The output vairable, which contains padded sequences."); + AddAttr( + "padded_length", + "The length of padded sequences. It can be setted to -1 or " + "any positive int. When it is -1, all sequences will be padded up to " + "the length of the longest one among them; when it a certain positive " + "value, it must be greater than the length of the longest original " + "sequence.") + .SetDefault(-1); AddComment(R"DOC( )DOC"); diff --git a/paddle/fluid/operators/sequence_pad_op.cu b/paddle/fluid/operators/sequence_pad_op.cu index a2fa62957e..ff8f81a2f0 100644 --- a/paddle/fluid/operators/sequence_pad_op.cu +++ b/paddle/fluid/operators/sequence_pad_op.cu @@ -17,7 +17,13 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( sequence_pad, - ops::SequencePadOpKernel); + ops::SequencePadOpKernel, + ops::SequencePadOpKernel, + ops::SequencePadOpKernel, + ops::SequencePadOpKernel); REGISTER_OP_CUDA_KERNEL( sequence_pad_grad, - ops::SequencePadGradOpKernel); + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel, + ops::SequencePadGradOpKernel); diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h index 6d136b65f1..44aff30879 100644 --- a/paddle/fluid/operators/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once + +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/math_function.h" @@ -24,68 +26,24 @@ namespace operators { using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; -template -struct CopyFunctor { - LoDTensor* lod_tensor_; - LoDTensor* pad_tensor_; - const LoD& ref_lod_; - const DeviceContext& ctx_; - bool is_lod_to_pad_; - - CopyFunctor(LoDTensor* lod_tensor, const LoD& ref_lod, LoDTensor* pad_tensor, - const DeviceContext& ctx, bool is_lod_to_pad) - : lod_tensor_(lod_tensor), - pad_tensor_(pad_tensor), - ref_lod_(ref_lod), - ctx_(ctx), - is_lod_to_pad_(is_lod_to_pad) {} - - void operator()() const { - /* - auto seq_num = ref_lod_.size() - 1; - auto max_len = pad_tensor_->dims()[0] / seq_num; - - PADDLE_ENFORCE_EQ(max_len * seq_num, pad_tensor_->dims()[0], - "First dimension of padded tensor should be equal to " - "maximum sequence length mulplied by sequence number."); - - for (size_t i = 1; i < ref_lod_.size(); ++i) { - auto seq_start = ref_lod_[i - 1]; - auto seq_end = ref_lod_[i]; - auto pad_start = (i - 1) * max_len; - auto pad_end = pad_start + (seq_end - seq_start); - auto sub_lod_tensor = lod_tensor_->Slice(seq_start, seq_end); - auto sub_pad_tensor = pad_tensor_->Slice(pad_start, pad_end); - if (is_lod_to_pad_) { - framework::TensorCopy(sub_lod_tensor, ctx.GetPlace(), &sub_pad_tensor); - } else { - framework::TensorCopy(sub_pad_tensor, ctx.GetPlace(), &sub_lod_tensor); - } - } - */ - } -}; - template class SequencePadOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - /* - auto* x = ctx.Input("X"); - auto* out_ptr = ctx.Output("Out"); - - out_ptr->mutable_data(ctx.GetPlace()); + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); - // Resize(); + const auto* pad_value = ctx.Input("PadValue"); + const T* pad_value_data = pad_value->data(); + std::vector pad_value_vec(pad_value_data, + pad_value_data + pad_value->numel()); - T pad_value = static_cast(ctx.Attr("pad_value")); + int padded_length = ctx.Attr("padded_length"); math::PaddingLoDTensorFunctor()( - ctx.template device_context(), *x, *, false); - - math::SetConstant set_func; - set_func(ctx.template device_context(), out_ptr, pad_value); - */ + ctx.template device_context(), *x, out, pad_value_vec, + padded_length, 0, false, math::kBatchLengthWidth); } }; @@ -93,26 +51,17 @@ template class SequencePadGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - /* - auto* x_ptr = ctx.Input("X"); - auto* g_out_ptr = ctx.Input(framework::GradVarName("Out")); - auto* g_x_ptr = ctx.Output(framework::GradVarName("X")); - - math::SetConstant set_func; - set_func(ctx.template device_context(), - g_x_ptr, - static_cast(0)); + auto* d_x = ctx.Output(framework::GradVarName("X")); + if (d_x) { + const auto* d_out = ctx.Input(framework::GradVarName("Out")); + d_x->mutable_data(ctx.GetPlace()); - auto& x_lod = x_ptr->lod(); - auto& x_last_level_lod = x_lod[x_lod.size() - 1]; + int padded_length = ctx.Attr("padded_length"); - CopyFunctor copy_func(g_out_ptr, - x_last_level_lod, - g_x_ptr, - ctx, - false); - copy_func(); - */ + math::UnpaddingLoDTensorFunctor()( + ctx.template device_context(), *d_out, d_x, + padded_length, 0, false, math::kBatchLengthWidth); + } } }; diff --git a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py new file mode 100644 index 0000000000..7b9eedbf52 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py @@ -0,0 +1,134 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +class TestSequencePadOp(OpTest): + def set_attr(self): + self.x_shape = [12, 4] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0] + self.padded_length = -1 + self.dtype = 'float32' + + def set_data(self): + x_data = np.random.uniform(0.1, 0.5, self.x_shape).astype(self.dtype) + pad_value_data = np.array(self.pad_value).astype(self.dtype) + self.inputs = { + 'X': (x_data, self.x_len_lod), + 'PadValue': pad_value_data + } + self.attrs = {'padded_length': self.padded_length} + + def compute(self): + # get padded length + padded_length = self.padded_length + x_len_lod_0 = self.x_len_lod[0] + if padded_length == -1: + max_seq_len = 0 + for l in x_len_lod_0: + max_seq_len = max(max_seq_len, l) + padded_length = max_seq_len + + # do padding + x_data = self.inputs['X'][0] + pad_value_data = self.inputs['PadValue'] + if pad_value_data.shape == (1, ): + pad_value_data = np.broadcast_to( + pad_value_data, shape=x_data.shape[1:]) + padded_sequences = [] + start_idx = 0 + for l in x_len_lod_0: + end_idx = start_idx + l + seq = x_data[start_idx:end_idx] + to_pad_len = padded_length - l + for _ in range(to_pad_len): + seq = np.append(seq, pad_value_data[np.newaxis, :], axis=0) + padded_sequences.append(seq) + start_idx = end_idx + + out_len_lod = self.x_len_lod[:] + out_len_lod_0 = [padded_length] * len(x_len_lod_0) + out_len_lod[0] = out_len_lod_0 + out_data = np.concatenate(padded_sequences, axis=0) + self.outputs = {'Out': (out_data, out_len_lod)} + + def setUp(self): + self.op_type = 'sequence_pad' + self.set_attr() + self.set_data() + self.compute() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestSequencePadOp2(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 4] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0, 2.0, 3.0, 4.0] + self.padded_length = -1 + self.dtype = 'float32' + + +class TestSequencePadOp3(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 4] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0] + self.padded_length = 7 + self.dtype = 'float32' + + +class TestSequencePadOp4(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 4] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0, 2.0, 3.0, 4.0] + self.padded_length = 7 + self.dtype = 'float32' + + +class TestSequencePadOp5(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 2, 2] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0] + self.padded_length = -1 + self.dtype = 'float32' + + +class TestSequencePadOp6(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 2, 2] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [[1.0, 2.0], [3.0, 4.0]] + self.padded_length = -1 + self.dtype = 'float32' + + +class TestSequencePadOp7(TestSequencePadOp): + def set_attr(self): + self.x_shape = [12, 2, 2] + self.x_len_lod = [[2, 3, 4, 3]] + self.pad_value = [1.0] + self.padded_length = 7 + self.dtype = 'float32' From 34b209cffa81593092a308e2ffe0536b475e81e6 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 20 Aug 2018 16:33:04 +0800 Subject: [PATCH 06/10] Complete sequence_padding GPU kernel --- paddle/fluid/operators/CMakeLists.txt | 1 + .../fluid/operators/math/sequence_padding.cc | 26 +-- .../fluid/operators/math/sequence_padding.cu | 151 ++++++++---------- .../fluid/operators/math/sequence_padding.h | 6 +- .../operators/math/sequence_padding_test.cc | 13 +- paddle/fluid/operators/sequence_pad_op.h | 5 +- paddle/fluid/operators/warpctc_op.h | 15 +- 7 files changed, 113 insertions(+), 104 deletions(-) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index ff0e989464..2179a5acdb 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -277,6 +277,7 @@ op_library(unsqueeze_op DEPS reshape_op) op_library(squeeze_op DEPS reshape_op) op_library(extract_rows_op DEPS memory) op_library(flatten_op DEPS reshape_op) +op_library(sequence_pad_op DEPS sequence_padding) if (WITH_GPU) op_library(conv_op DEPS vol2col depthwise_conv im2col) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index d3dab64f60..02ede3edce 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -18,8 +18,6 @@ namespace paddle { namespace operators { namespace math { -enum CopyType { kSeqToPad, kPadToSeq }; - template void CopyValidData(framework::Tensor* dst_tensor, const framework::Tensor* src_tensor, @@ -67,7 +65,7 @@ class PaddingLoDTensorFunctor { void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& seq_tensor, framework::LoDTensor* pad_tensor, - std::vector pad_value = {0}, int pad_seq_len = -1, + const framework::LoDTensor& pad_value, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, const PadLayout layout = kBatchLengthWidth) { auto seq_lod = seq_tensor.lod(); @@ -81,19 +79,21 @@ class PaddingLoDTensorFunctor { CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); - PADDLE_ENFORCE(pad_value.size() == 1 || - static_cast(pad_value.size()) == step_width, - "The size of 'pad_value' can only be 1 or be equal to the " + PADDLE_ENFORCE(pad_value.numel() == 1 || pad_value.numel() == step_width, + "The numel of 'pad_value' can only be 1 or be equal to the " "'step_width'."); - if (pad_value.size() == 1) { - pad_value = std::vector(step_width, pad_value[0]); - } - // fill padding value T* pad_data = pad_tensor->data(); - for (int i = 0; i < pad_tensor->numel(); i += step_width) { - memcpy(pad_data + i, pad_value.data(), step_width * sizeof(T)); + const T* pad_value_data = pad_value.data(); + if (pad_value.numel() == 1) { + for (int i = 0; i < pad_tensor->numel(); ++i) { + pad_data[i] = *pad_value_data; + } + } else { + for (int i = 0; i < pad_tensor->numel(); i += step_width) { + memcpy(pad_data + i, pad_value_data, step_width * sizeof(T)); + } } CopyValidData(pad_tensor, &seq_tensor, seq_offsets, pad_seq_len, @@ -117,7 +117,7 @@ class UnpaddingLoDTensorFunctor { const framework::LoDTensor& pad_tensor, framework::LoDTensor* seq_tensor, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, - const PadLayout& layout = kBatchLengthWidth) { + const PadLayout layout = kBatchLengthWidth) { auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; const auto& seq_tensor_dims = seq_tensor->dims(); const auto& pad_tensor_dims = pad_tensor.dims(); diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 20e3e3de2a..3b1a44a457 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -19,46 +19,32 @@ namespace paddle { namespace operators { namespace math { -template +template __global__ void SequencePaddingKernel( - T* pad_data, T* seq_data, const size_t* seq_offset, const size_t& seq_num, - const size_t& max_seq_len, const size_t& seq_width, bool norm_by_times, - const T& pad_value, const OutputLayout& output_layout) { + T* dst, const T* src, const T* pad_value, bool is_constant_pad, + const size_t* seq_offsets, const size_t& seq_num, const size_t& pad_seq_len, + const size_t& step_width, bool norm_by_len, const PadLayout& layout) { size_t seq_idx = blockIdx.y; - size_t seq_start = seq_offset[seq_idx]; - size_t seq_len = seq_offset[seq_idx + 1] - seq_start; - - size_t seq_step_idx = blockIdx.x * blockDim.y + threadIdx.y; - - size_t seq_data_offset = (seq_start + seq_step_idx) * seq_width; - - size_t pad_data_offset = 0; - - if (output_layout == kLengthBatchWidth) { - pad_data_offset = (seq_step_idx * seq_num + seq_idx) * seq_width; - } else { - pad_data_offset = (seq_idx * max_seq_len + seq_step_idx) * seq_width; - } - - if (seq_step_idx < seq_len) { - T scale = norm_by_times ? (1.0f / static_cast(seq_len)) : 1.0f; - if (Padding) { - /* seq -> pad */ - for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - pad_data[pad_data_offset + i] = scale * seq_data[seq_data_offset + i]; - } - } else { - /* pad -> seq */ - for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - seq_data[seq_data_offset + i] = scale * pad_data[pad_data_offset + i]; - } + size_t seq_len = seq_offsets[seq_idx + 1] - seq_offsets[seq_idx]; + + size_t step_idx = blockIdx.x * blockDim.y + threadIdx.y; + size_t seq_data_offset = (seq_offsets[seq_idx] + step_idx) * step_width; + size_t pad_data_offset = layout == kBatchLengthWidth + ? (seq_idx * pad_seq_len + step_idx) * step_width + : (step_idx * seq_num + seq_idx) * step_width; + + T* dst_data = dst + (Type == kSeqToPad ? pad_data_offset : seq_data_offset); + const T* src_data = + src + (Type == kSeqToPad ? seq_data_offset : pad_data_offset); + + if (step_idx < seq_len) { + float scale = norm_by_len ? (1.0f / static_cast(seq_len)) : 1.0f; + for (size_t i = threadIdx.x; i < step_width; i += blockDim.x) { + dst_data[i] = scale * src_data[i]; } - } else if (seq_step_idx < max_seq_len) { - if (Padding) { - /* seq -> pad */ - for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { - pad_data[pad_data_offset + i] = pad_value; - } + } else if (step_idx < pad_seq_len && Type == kSeqToPad) { + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + dst_data[i] = is_constant_pad ? pad_value[0] : pad_value[i]; } } } @@ -69,24 +55,26 @@ class PaddingLoDTensorFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::LoDTensor& seq_tensor, framework::Tensor* pad_tensor, - T pad_value = static_cast(0), bool norm_by_times = false, - size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth) { - CheckLoD(seq_tensor, lod_level); - - auto& lod = seq_tensor.lod(); - auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - - auto seq_tensor_dims = seq_tensor.dims(); - auto pad_tensor_dims = pad_tensor->dims(); - int64_t max_seq_len = MaximumSequenceLength(seq_offset); - int64_t seq_num = seq_offset.size() - 1; - int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0]; + const framework::LoDTensor& pad_value, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout layout = kBatchLengthWidth) { + auto seq_lod = seq_tensor.lod(); + const auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level]; + const auto& seq_tensor_dims = seq_tensor.dims(); + const auto& pad_tensor_dims = pad_tensor->dims(); + if (pad_seq_len == -1) { + pad_seq_len = MaximumSequenceLength(seq_offsets); + } + int step_width = seq_tensor.numel() / seq_tensor_dims[0]; + int seq_num = seq_offset.size() - 1; - CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, - seq_num, seq_width, output_layout); + CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, + step_width, layout); + PADDLE_ENFORCE(pad_value.numel() == 1 || pad_value.numel() == step_width, + "The numel of 'pad_value' can only be 1 or be equal to the " + "'step_width'."); - if (!norm_by_times && seq_num == 1UL) { + if (!norm_by_times && seq_num == 1UL && pad_seq_len == -1) { TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor); pad_tensor->Resize(pad_tensor_dims); return; @@ -98,21 +86,22 @@ class PaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((step_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_x = (pad_seq_len + block_dim_y - 1) / block_dim_y; size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); const T* seq_data = seq_tensor.data(); T* pad_data = pad_tensor->data(); + const T* pad_value_data = pad_value.data(); - SequencePaddingKernel<<>>( - pad_data, const_cast(seq_data), - seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, - seq_width, norm_by_times, pad_value, output_layout); + SequencePaddingKernel<<>>( + pad_data, seq_data, pad_value_data, pad_value.numel() == 1, + seq_offset.CUDAData(context.GetPlace()), seq_num, pad_seq_len, + step_width, norm_by_times, layout); } }; @@ -120,25 +109,23 @@ template class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, - framework::LoDTensor* seq_tensor, - const framework::Tensor& pad_tensor, - bool norm_by_times = false, size_t lod_level = 0, - OutputLayout output_layout = kBatchLengthWidth) { - CheckLoD(*seq_tensor, lod_level); - - auto& lod = seq_tensor->lod(); - auto& seq_offset = framework::ToAbsOffset(lod)[lod_level]; - - auto seq_tensor_dims = seq_tensor->dims(); - auto pad_tensor_dims = pad_tensor.dims(); - int64_t max_seq_len = MaximumSequenceLength(seq_offset); - int64_t seq_num = seq_offset.size() - 1; - int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0]; + const framework::LoDTensor& pad_tensor, + framework::LoDTensor* seq_tensor, int pad_seq_len = -1, + int lod_level = 0, bool norm_by_times = false, + const PadLayout layout = kBatchLengthWidth) { + auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; + const auto& seq_tensor_dims = seq_tensor->dims(); + const auto& pad_tensor_dims = pad_tensor.dims(); + if (pad_seq_len == -1) { + pad_seq_len = MaximumSequenceLength(seq_offsets); + } + int step_width = seq_tensor->numel() / seq_tensor_dims[0]; + int seq_num = seq_offset.size() - 1; - CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len, - seq_num, seq_width, output_layout); + CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, + step_width, layout); - if (!norm_by_times && seq_num == 1UL) { + if (!norm_by_times && seq_num == 1UL && pad_seq_len == -1) { TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor); seq_tensor->Resize(seq_tensor_dims); return; @@ -150,21 +137,21 @@ class UnpaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((step_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_x = (pad_seq_len + block_dim_y - 1) / block_dim_y; size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); const T* pad_data = pad_tensor.data(); T* seq_data = seq_tensor->data(); - SequencePaddingKernel<<>>( - const_cast(pad_data), seq_data, - seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, - seq_width, norm_by_times, static_cast(0), output_layout); + SequencePaddingKernel<<>>( + seq_data, pad_data, nullptr, false, + seq_offset.CUDAData(context.GetPlace()), seq_num, pad_seq_len, + step_width, norm_by_times, layout); } }; diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 9b8c892c53..3fb5859e3b 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -25,6 +25,8 @@ namespace math { enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; +enum CopyType { kSeqToPad, kPadToSeq }; + inline static size_t MaximumSequenceLength( const framework::Vector& seq_offset) { size_t seq_num = seq_offset.size() - 1; @@ -82,7 +84,7 @@ class PaddingLoDTensorFunctor { void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& seq_tensor, framework::LoDTensor* pad_tensor, - std::vector pad_value = {0}, int pad_seq_len = -1, + const framework::LoDTensor& pad_value, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, const PadLayout layout = kBatchLengthWidth); }; @@ -94,7 +96,7 @@ class UnpaddingLoDTensorFunctor { const framework::LoDTensor& pad_tensor, framework::LoDTensor* seq_tensor, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, - const PadLayout& layout = kBatchLengthWidth); + const PadLayout layout = kBatchLengthWidth); }; } // namespace math diff --git a/paddle/fluid/operators/math/sequence_padding_test.cc b/paddle/fluid/operators/math/sequence_padding_test.cc index 3171c7c33e..4f61b1029c 100644 --- a/paddle/fluid/operators/math/sequence_padding_test.cc +++ b/paddle/fluid/operators/math/sequence_padding_test.cc @@ -24,6 +24,8 @@ void TestSequencePadding(const paddle::framework::LoD& lod, paddle::framework::LoDTensor seq; paddle::framework::LoDTensor seq_back; paddle::framework::LoDTensor padding; + paddle::framework::LoDTensor cpu_pad_value; + paddle::framework::LoDTensor pad_value; const size_t level = lod.size() - 1; auto seq_dims = @@ -55,8 +57,17 @@ void TestSequencePadding(const paddle::framework::LoD& lod, padding.mutable_data(padding_dims, *place); + T* pad_value_data = + cpu_pad_value.mutable_data({1}, paddle::platform::CPUPlace()); + *pad_value_data = static_cast(0); + if (paddle::platform::is_cpu_place(*place)) { + pad_value = cpu_pad_value; + } else { + TensorCopySync(cpu_pad_value, *place, &pad_value); + } + paddle::operators::math::PaddingLoDTensorFunctor()( - *context, seq, &padding, {0}, -1, 0, false, + *context, seq, &padding, pad_value, -1, 0, false, paddle::operators::math::kLengthBatchWidth); seq_back.set_lod(lod); diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h index 44aff30879..5fc9da69d7 100644 --- a/paddle/fluid/operators/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -35,14 +35,11 @@ class SequencePadOpKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); const auto* pad_value = ctx.Input("PadValue"); - const T* pad_value_data = pad_value->data(); - std::vector pad_value_vec(pad_value_data, - pad_value_data + pad_value->numel()); int padded_length = ctx.Attr("padded_length"); math::PaddingLoDTensorFunctor()( - ctx.template device_context(), *x, out, pad_value_vec, + ctx.template device_context(), *x, out, *pad_value, padded_length, 0, false, math::kBatchLengthWidth); } }; diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 6cbf985039..444265f58d 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -161,10 +161,21 @@ class WarpCTCKernel : public framework::OpKernel { static_cast(num_sequences), static_cast(sequence_width)}); warpctc_logits.mutable_data(warpctc_logits_dims, ctx.GetPlace()); + + LoDTensor cpu_pad_value; + T* pad_value_data = + cpu_pad_value.mutable_data({1}, platform::CPUPlace()); + *pad_value_data = static_cast(0); + LoDTensor pad_value; + if (platform::is_cpu_place(ctx.GetPlace())) { + pad_value = cpu_pad_value; + } else { + TensorCopySync(cpu_pad_value, ctx.GetPlace(), &pad_value); + } + math::PaddingLoDTensorFunctor()( ctx.template device_context(), *logits, &warpctc_logits, - {static_cast(0)}, -1, 0, false /* norm_by_times */, - math::kLengthBatchWidth); + pad_value, -1, 0, false /* norm_by_times */, math::kLengthBatchWidth); const T* warpctc_logits_data = warpctc_logits.data(); std::vector warpctc_label_lengths(num_sequences); From ce182d9037b988dcbf1c7b86dafd60745afb2d4c Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 22 Aug 2018 11:23:23 +0800 Subject: [PATCH 07/10] bug fix --- .../fluid/operators/math/sequence_padding.cu | 38 ++++++++++++------- .../fluid/operators/math/sequence_padding.h | 4 +- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 3b1a44a457..93d239351a 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -22,8 +22,8 @@ namespace math { template __global__ void SequencePaddingKernel( T* dst, const T* src, const T* pad_value, bool is_constant_pad, - const size_t* seq_offsets, const size_t& seq_num, const size_t& pad_seq_len, - const size_t& step_width, bool norm_by_len, const PadLayout& layout) { + const size_t* seq_offsets, const size_t seq_num, const size_t pad_seq_len, + const size_t step_width, bool norm_by_len, const PadLayout layout) { size_t seq_idx = blockIdx.y; size_t seq_len = seq_offsets[seq_idx + 1] - seq_offsets[seq_idx]; @@ -43,7 +43,7 @@ __global__ void SequencePaddingKernel( dst_data[i] = scale * src_data[i]; } } else if (step_idx < pad_seq_len && Type == kSeqToPad) { - for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + for (size_t i = threadIdx.x; i < step_width; i += blockDim.x) { dst_data[i] = is_constant_pad ? pad_value[0] : pad_value[i]; } } @@ -54,7 +54,7 @@ class PaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::LoDTensor& seq_tensor, - framework::Tensor* pad_tensor, + framework::LoDTensor* pad_tensor, const framework::LoDTensor& pad_value, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, const PadLayout layout = kBatchLengthWidth) { @@ -62,11 +62,12 @@ class PaddingLoDTensorFunctor { const auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level]; const auto& seq_tensor_dims = seq_tensor.dims(); const auto& pad_tensor_dims = pad_tensor->dims(); + int max_seq_len = MaximumSequenceLength(seq_offsets); if (pad_seq_len == -1) { - pad_seq_len = MaximumSequenceLength(seq_offsets); + pad_seq_len = max_seq_len; } int step_width = seq_tensor.numel() / seq_tensor_dims[0]; - int seq_num = seq_offset.size() - 1; + int seq_num = seq_offsets.size() - 1; CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); @@ -74,13 +75,13 @@ class PaddingLoDTensorFunctor { "The numel of 'pad_value' can only be 1 or be equal to the " "'step_width'."); - if (!norm_by_times && seq_num == 1UL && pad_seq_len == -1) { + if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) { TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor); pad_tensor->Resize(pad_tensor_dims); return; } - const int64_t kBlockSize = 512; + const int kBlockSize = 512; /* At least use 32 threads to copy sequence_width elements, * and at least 8 elements for each thread. @@ -100,8 +101,16 @@ class PaddingLoDTensorFunctor { SequencePaddingKernel<<>>( pad_data, seq_data, pad_value_data, pad_value.numel() == 1, - seq_offset.CUDAData(context.GetPlace()), seq_num, pad_seq_len, + seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len, step_width, norm_by_times, layout); + + if (layout == kBatchLengthWidth) { + framework::LoD pad_lod(seq_lod.begin() + lod_level, seq_lod.end()); + for (size_t i = 0; i < pad_lod[0].size(); ++i) { + pad_lod[0][i] = i * pad_seq_len; + } + pad_tensor->set_lod(pad_lod); + } } }; @@ -116,22 +125,23 @@ class UnpaddingLoDTensorFunctor { auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; const auto& seq_tensor_dims = seq_tensor->dims(); const auto& pad_tensor_dims = pad_tensor.dims(); + int max_seq_len = MaximumSequenceLength(seq_offsets); if (pad_seq_len == -1) { - pad_seq_len = MaximumSequenceLength(seq_offsets); + pad_seq_len = max_seq_len; } int step_width = seq_tensor->numel() / seq_tensor_dims[0]; - int seq_num = seq_offset.size() - 1; + int seq_num = seq_offsets.size() - 1; CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); - if (!norm_by_times && seq_num == 1UL && pad_seq_len == -1) { + if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) { TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor); seq_tensor->Resize(seq_tensor_dims); return; } - const int64_t kBlockSize = 512; + const int kBlockSize = 512; /* At least use 32 threads to copy sequence_width elements, * and at least 8 elements for each thread. @@ -150,7 +160,7 @@ class UnpaddingLoDTensorFunctor { SequencePaddingKernel<<>>( seq_data, pad_data, nullptr, false, - seq_offset.CUDAData(context.GetPlace()), seq_num, pad_seq_len, + seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len, step_width, norm_by_times, layout); } }; diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 3fb5859e3b..e752aa5897 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -81,7 +81,7 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims, template class PaddingLoDTensorFunctor { public: - void operator()(const platform::CPUDeviceContext& context, + void operator()(const DeviceContext& context, const framework::LoDTensor& seq_tensor, framework::LoDTensor* pad_tensor, const framework::LoDTensor& pad_value, int pad_seq_len = -1, @@ -92,7 +92,7 @@ class PaddingLoDTensorFunctor { template class UnpaddingLoDTensorFunctor { public: - void operator()(const platform::CPUDeviceContext& context, + void operator()(const DeviceContext& context, const framework::LoDTensor& pad_tensor, framework::LoDTensor* seq_tensor, int pad_seq_len = -1, int lod_level = 0, bool norm_by_times = false, From f4a4a4cbd934a8af2d4d889cdb0db74fc6a9cfd2 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 22 Aug 2018 16:40:38 +0800 Subject: [PATCH 08/10] add op comment and python layer --- .../fluid/operators/math/sequence_padding.cu | 3 ++ paddle/fluid/operators/sequence_pad_op.cc | 46 +++++++++++++++++++ python/paddle/fluid/layers/nn.py | 45 ++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 93d239351a..f94e8dbc3a 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -66,6 +66,9 @@ class PaddingLoDTensorFunctor { if (pad_seq_len == -1) { pad_seq_len = max_seq_len; } + PADDLE_ENFORCE_GE(pad_seq_len, max_seq_len, + "The pad_seq_len must be equal to or greater than the " + "original max sequence length."); int step_width = seq_tensor.numel() / seq_tensor_dims[0]; int seq_num = seq_offsets.size() - 1; diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index f23710cf4d..a08804cfba 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -101,6 +101,52 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { "sequence.") .SetDefault(-1); AddComment(R"DOC( + Sequence Pad Operator + + This operator pads sequences in a same batch to a consistent length. + The length is specified by attribute 'padded_length'. New elements, + whose values are specified by input 'PadValue', will be appended to + the end of each sequence, to make their final lengths consistent. + + Following are cases to better explain how this works: + + Case 1: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [a, b, c, d, e] + and Input(PadValue): + PadValue.data = [0] + and attribite 'padded_length' = 4, + then we get 1-level LoDTensor: + Out.lod = [[0, 4, 8]] + Out.data = [a, b, 0, 0, c, d, e, 0] + + Case 2: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] + and Input(PadValue): + PadValue.data = [0] + and attribite 'padded_length' = -1, which mean using the length + of longest input sequence(3 in this case), + then we get 1-level LoDTensor: + Out.lod = [[0, 3, 6]] + Out.data = [[a1, a2], [b1, b2], [0, 0], [c1, c2], [d1, d2], [e1, e2]] + + Case 3: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] + and Input(PadValue): + PadValue.data = [p1, p2] + and attribite 'padded_length' = -1, which mean using the length + of longest input sequence(3 in this case), + then we get 1-level LoDTensor: + Out.lod = [[0, 3, 6]] + Out.data = [[a1, a2], [b1, b2], [p1, p2], [c1, c2], [d1, d2], [e1, e2]] )DOC"); } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0960b54123..d782ea7470 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2662,6 +2662,51 @@ def sequence_expand(x, y, ref_level=-1, name=None): return tmp +@templatedoc() +def sequence_pad(x, pad_value, maxlen=None): + """ + ${comment} + + Args: + x(Variable): Input variable which should contain lod information. + pad_value(Variable): The Variable that holds values that will be fill + into padded steps. It can be a scalar or a tensor whose shape + equals to time steps in sequences. If it's a scalar, it will be + automatically broadcasted to the shape of time step. + maxlen(int, default None): The length of padded sequences. It can be + None or any positive int. When it is None, all sequences will be + padded up to the length of the longest one among them; when it a + certain positive value, it must be greater than the length of the + longest original sequence." + + Returns: + Variable: The padded sequence batch. All sequences has the same length. + + Examples: + .. code-block:: python + + import numpy + + x = fluid.layers.data(name='y', shape=[10, 5], + dtype='float32', lod_level=1) + pad_value = fluid.layers.assign(input=numpy.array([0])) + out = fluid.layers.sequence_pad(x=x, pad_value=pad_value) + """ + + helper = LayerHelper('sequence_pad', input=x, **locals()) + dtype = helper.input_dtype() + out = helper.create_tmp_variable(dtype) + if maxlen is None: + maxlen = -1 + helper.append_op( + type='sequence_pad', + inputs={'X': x, + 'PadValue': pad_value}, + outputs={'Out': out}, + attrs={'padded_length': maxlen}) + return out + + def beam_search(pre_ids, pre_scores, ids, From 0fb5e351c1ddc36c9361de8b279deba99444bd58 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 23 Aug 2018 09:33:00 +0000 Subject: [PATCH 09/10] update API.spec --- paddle/fluid/API.spec | 1 + python/paddle/fluid/layers/nn.py | 1 + 2 files changed, 2 insertions(+) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 46e56981ea..0df617f76d 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -113,6 +113,7 @@ paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)) +paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)) paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index d782ea7470..2f115afa6f 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -66,6 +66,7 @@ __all__ = [ 'conv2d_transpose', 'conv3d_transpose', 'sequence_expand', + 'sequence_pad', 'lstm_unit', 'reduce_sum', 'reduce_mean', From 9cb455fa7d89f78199662f796149f3c108070bbe Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 27 Aug 2018 17:11:33 +0800 Subject: [PATCH 10/10] update function --- .../fluid/operators/math/sequence_padding.cc | 9 ------ .../fluid/operators/math/sequence_padding.cu | 8 ----- paddle/fluid/operators/sequence_pad_op.cc | 32 +++++++++++-------- .../tests/unittests/test_sequence_pad_op.py | 7 ++-- 4 files changed, 20 insertions(+), 36 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 02ede3edce..25f06a25a0 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -98,15 +98,6 @@ class PaddingLoDTensorFunctor { CopyValidData(pad_tensor, &seq_tensor, seq_offsets, pad_seq_len, step_width, norm_by_times, kSeqToPad, layout); - - // Set pad_tensor's lod info if possible - if (layout == kBatchLengthWidth) { - framework::LoD pad_lod(seq_lod.begin() + lod_level, seq_lod.end()); - for (size_t i = 0; i < pad_lod[0].size(); ++i) { - pad_lod[0][i] = i * pad_seq_len; - } - pad_tensor->set_lod(pad_lod); - } } }; diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index f94e8dbc3a..035e10dcbe 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -106,14 +106,6 @@ class PaddingLoDTensorFunctor { pad_data, seq_data, pad_value_data, pad_value.numel() == 1, seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len, step_width, norm_by_times, layout); - - if (layout == kBatchLengthWidth) { - framework::LoD pad_lod(seq_lod.begin() + lod_level, seq_lod.end()); - for (size_t i = 0; i < pad_lod[0].size(); ++i) { - pad_lod[0][i] = i * pad_seq_len; - } - pad_tensor->set_lod(pad_lod); - } } }; diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index a08804cfba..44d73aa407 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -40,7 +40,8 @@ class SequencePadOp : public framework::OperatorWithKernel { "The Input(PadValue) must be a scalar or a tensor whose " "shape equals to time steps in sequences"); - int batch_dim_size = -1; + int out_dim_0 = -1; + int out_dim_1 = -1; if (ctx->IsRuntime()) { // run time @@ -64,7 +65,8 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(padded_length, max_seq_len, "The Attr(padded_length) must be -1 or an int greater " "than the length of the longest original sequence."); - batch_dim_size = padded_length * seq_num; + out_dim_0 = seq_num; + out_dim_1 = padded_length; } else { // compile time framework::VarDesc* x_desc = @@ -72,9 +74,11 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1); } - auto out_dims = x_dims; - out_dims[0] = batch_dim_size; - ctx->SetOutputDim("Out", out_dims); + std::vector out_dims_vec{out_dim_0, out_dim_1}; + auto time_step_dims_vec = framework::vectorize2int(time_step_dims); + out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(), + time_step_dims_vec.end()); + ctx->SetOutputDim("Out", framework::make_ddim(out_dims_vec)); } }; @@ -118,9 +122,9 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { and Input(PadValue): PadValue.data = [0] and attribite 'padded_length' = 4, - then we get 1-level LoDTensor: - Out.lod = [[0, 4, 8]] - Out.data = [a, b, 0, 0, c, d, e, 0] + then we get LoDTensor: + Out.data = [[a, b, 0, 0], + [c, d, e, 0]] Case 2: @@ -131,9 +135,9 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { PadValue.data = [0] and attribite 'padded_length' = -1, which mean using the length of longest input sequence(3 in this case), - then we get 1-level LoDTensor: - Out.lod = [[0, 3, 6]] - Out.data = [[a1, a2], [b1, b2], [0, 0], [c1, c2], [d1, d2], [e1, e2]] + then we get LoDTensor: + Out.data = [[[a1, a2], [b1, b2], [0, 0]], + [[c1, c2], [d1, d2], [e1, e2]]] Case 3: @@ -144,9 +148,9 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { PadValue.data = [p1, p2] and attribite 'padded_length' = -1, which mean using the length of longest input sequence(3 in this case), - then we get 1-level LoDTensor: - Out.lod = [[0, 3, 6]] - Out.data = [[a1, a2], [b1, b2], [p1, p2], [c1, c2], [d1, d2], [e1, e2]] + then we get LoDTensor: + Out.data = [[[a1, a2], [b1, b2], [p1, p2]], + [[c1, c2], [d1, d2], [e1, e2]]] )DOC"); } diff --git a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py index 7b9eedbf52..471515c817 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py @@ -61,11 +61,8 @@ class TestSequencePadOp(OpTest): padded_sequences.append(seq) start_idx = end_idx - out_len_lod = self.x_len_lod[:] - out_len_lod_0 = [padded_length] * len(x_len_lod_0) - out_len_lod[0] = out_len_lod_0 - out_data = np.concatenate(padded_sequences, axis=0) - self.outputs = {'Out': (out_data, out_len_lod)} + out_data = np.array(padded_sequences) + self.outputs = {'Out': out_data} def setUp(self): self.op_type = 'sequence_pad'