Paddle/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"
#include <string>

namespace paddle {
namespace operators {

class SequenceSoftmaxOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of SequenceSoftmaxOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of SequenceSoftmaxOp should not be null.");

    ctx->ShareDim("X", /*->*/ "Out");
    ctx->ShareLoD("X", /*->*/ "Out");
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    // choose cudnn kernel if the runtime supported.
    bool use_cudnn = ctx.Attr<bool>("use_cudnn");
    bool runtime_cudnn_support = false;
#ifdef PADDLE_WITH_CUDA
    if (platform::is_gpu_place(ctx.GetPlace())) {
      auto& dev_ctx =
          ctx.template device_context<platform::CUDADeviceContext>();
      runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;
    }
#endif
    framework::LibraryType library_ = framework::LibraryType::kPlain;
    if (use_cudnn && runtime_cudnn_support) {
      library_ = framework::LibraryType::kCUDNN;
    }
    std::string data_format = ctx.Attr<std::string>("data_format");
    return framework::OpKernelType(
        ctx.Input<Tensor>("X")->type(), ctx.GetPlace(),
        framework::StringToDataLayout(data_format), library_);
  }
};

class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
             "(LoDTensor) 1-D or 2-D input LoDTensor with the 2-nd dimension "
             "of length 1.");
    AddOutput("Out",
              "(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension "
              "of length 1.");
    AddAttr<bool>(
        "use_cudnn",
        "(bool, default false) Only used in cudnn kernel, need install cudnn")
        .SetDefault(false);
    AddAttr<std::string>(
        "data_format",
        "(string, default NCHW) Only used in "
        "An optional string from: \"NHWC\", \"NCHW\". "
        "Defaults to \"NHWC\". Specify the data format of the output data, "
        "the input will be transformed automatically. ")
        .SetDefault("AnyLayout");
    AddComment(R"DOC(
Sequence Softmax Operator.

SequenceSoftmaxOp computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either [N, 1] or [N], where N is the sum of the length
of all sequences.

The algorithm works as follows:

    for i-th sequence in a mini-batch:

$$
Out(X[lod[i]:lod[i+1]], :) = \
\frac{\exp(X[lod[i]:lod[i+1], :])} \
{\sum(\exp(X[lod[i]:lod[i+1], :]))}
$$

For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]
and N turns out to be 7.

)DOC");
  }
};

class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("Out"),
                   "Input(Out) of SequenceSoftmaxGradOp should not be null.");
    PADDLE_ENFORCE(
        ctx->HasInput(framework::GradVarName("Out")),
        "Input(Out@GRAD) of SequenceSoftmaxGradOp should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of SequenceSoftmaxOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
                   "Output(X@GRAD) of SequenceSoftmaxOp should not be null.");

    PADDLE_ENFORCE_EQ(
        ctx->GetInputDim("Out"),
        ctx->GetInputDim(framework::GradVarName("Out")),
        "Input(Out) and Input(Out@GRAD) of SequenceSoftmaxGradOp should be of "
        "the same shape.");

    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    // choose cudnn kernel if the runtime supported.
    bool use_cudnn = ctx.Attr<bool>("use_cudnn");
    bool runtime_cudnn_support = false;
#ifdef PADDLE_WITH_CUDA
    if (platform::is_gpu_place(ctx.GetPlace())) {
      auto& dev_ctx =
          ctx.template device_context<platform::CUDADeviceContext>();
      runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;
    }
#endif
    framework::LibraryType library_ = framework::LibraryType::kPlain;
    if (use_cudnn && runtime_cudnn_support) {
      library_ = framework::LibraryType::kCUDNN;
    }
    std::string data_format = ctx.Attr<std::string>("data_format");
    return framework::OpKernelType(
        ctx.Input<Tensor>("Out")->type(), ctx.GetPlace(),
        framework::StringToDataLayout(data_format), library_);
  }
};

DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(
    SequenceSoftmaxGradOpNoNeedBufferVarsInferer, "X");

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(sequence_softmax, ops::SequenceSoftmaxOp,
                  ops::SequenceSoftmaxOpMaker,
                  paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(sequence_softmax_grad, ops::SequenceSoftmaxGradOp,
                  ops::SequenceSoftmaxGradOpNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(
    sequence_softmax,
    ops::SequenceSoftmaxKernel<paddle::platform::CPUDeviceContext, float>,
    ops::SequenceSoftmaxKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    sequence_softmax_grad,
    ops::SequenceSoftmaxGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::SequenceSoftmaxGradKernel<paddle::platform::CPUDeviceContext, double>);
Fix the grammar in copyright. (#8403) 7 years ago			`/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.`
Initialize the sequence softmax operator. 8 years ago
			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`

Refine operator cmake (#14413) * wip simplify operator framework * wip * wip * done test=develop * clean test=develop * fix test=develop * fix deps test=develop * fix cpu build test=develop * fix tensorrt build test=develop * fix tests test=develop * fix test=develop * fix cpu build test=develop 6 years ago			`#include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"`
Fix CPPLint errors in operators (#9826) * Fix CPPLint errors in operators * Fix cast in softmax * Fix softmax_mkldnn * Fix send_recv_op_test * Send_recv * Fix softmax mkldnn 7 years ago			`#include <string>`
Initialize the sequence softmax operator. 8 years ago
			`namespace paddle {`
			`namespace operators {`

			`class SequenceSoftmaxOp : public framework::OperatorWithKernel {`
			`public:`
			`using framework::OperatorWithKernel::OperatorWithKernel;`

rename InferShapeContextBase to InferShapeContext 7 years ago			`void InferShape(framework::InferShapeContext* ctx) const override {`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago			`PADDLE_ENFORCE(ctx->HasInput("X"),`
			`"Input(X) of SequenceSoftmaxOp should not be null.");`
			`PADDLE_ENFORCE(ctx->HasOutput("Out"),`
			`"Output(Out) of SequenceSoftmaxOp should not be null.");`
Set the right shape of selected_rows (#13723) * set the right shape of selected_rows test=develop * enhance check * fix activation_op * remove cast * use ShareDimInfo replace SetDim and ShareLod * use ShareDimAndLod test=develop * follow comment test=develop * check whether the input has lod test=develop * Split ShareDimAndLod test=develop * checkout clip.py test=develop 6 years ago
			`ctx->ShareDim("X", /->/ "Out");`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago			`ctx->ShareLoD("X", /->/ "Out");`
Initialize the sequence softmax operator. 8 years ago			`}`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago
			`protected:`
			`framework::OpKernelType GetExpectedKernelType(`
			`const framework::ExecutionContext& ctx) const override {`
			`// choose cudnn kernel if the runtime supported.`
			`bool use_cudnn = ctx.Attr<bool>("use_cudnn");`
			`bool runtime_cudnn_support = false;`
			`#ifdef PADDLE_WITH_CUDA`
			`if (platform::is_gpu_place(ctx.GetPlace())) {`
			`auto& dev_ctx =`
			`ctx.template device_context<platform::CUDADeviceContext>();`
			`runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;`
			`}`
			`#endif`
			`framework::LibraryType library_ = framework::LibraryType::kPlain;`
			`if (use_cudnn && runtime_cudnn_support) {`
			`library_ = framework::LibraryType::kCUDNN;`
			`}`
			`std::string data_format = ctx.Attr<std::string>("data_format");`
			`return framework::OpKernelType(`
Change tensor uses proto::VarType::type test=develop 6 years ago			`ctx.Input<Tensor>("X")->type(), ctx.GetPlace(),`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago			`framework::StringToDataLayout(data_format), library_);`
			`}`
Initialize the sequence softmax operator. 8 years ago			`};`

			`class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {`
			`public:`
Clean OpProtoAndCheckerMaker Do not use ctor * Reduce line of codes. * We can use virtual function for Maker now. * The implementation does not care what maker holds, it is easier to refactor later. 7 years ago			`void Make() override {`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago			`AddInput("X",`
			`"(LoDTensor) 1-D or 2-D input LoDTensor with the 2-nd dimension "`
			`"of length 1.");`
			`AddOutput("Out",`
			`"(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension "`
			`"of length 1.");`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago			`AddAttr<bool>(`
			`"use_cudnn",`
			`"(bool, default false) Only used in cudnn kernel, need install cudnn")`
			`.SetDefault(false);`
			`AddAttr<std::string>(`
			`"data_format",`
			`"(string, default NCHW) Only used in "`
			`"An optional string from: \"NHWC\", \"NCHW\". "`
			`"Defaults to \"NHWC\". Specify the data format of the output data, "`
			`"the input will be transformed automatically. ")`
			`.SetDefault("AnyLayout");`
Initialize the sequence softmax operator. 8 years ago			`AddComment(R"DOC(`
Fixing documentation for operators (#5373) * Adding documentation for seq_expand * Adding documentation for seq_concat_op * Adding documentation for sequence_conv * Adding sequence_pool * Fixing review comment * Adding sequence_softmax * Updating doc for sigmoid_cross_entropy_with_logits 7 years ago			`Sequence Softmax Operator.`

			`SequenceSoftmaxOp computes the softmax activation among all time-steps for each`
Merge branch 'develop' into core_add_sequence_softmax_op 7 years ago			`sequence. The dimension of each time-step should be 1. Thus, the shape of`
Fixing documentation for operators (#5373) * Adding documentation for seq_expand * Adding documentation for seq_concat_op * Adding documentation for sequence_conv * Adding sequence_pool * Fixing review comment * Adding sequence_softmax * Updating doc for sigmoid_cross_entropy_with_logits 7 years ago			`input Tensor can be either [N, 1] or [N], where N is the sum of the length`
			`of all sequences.`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago
Fixing documentation for operators (#5373) * Adding documentation for seq_expand * Adding documentation for seq_concat_op * Adding documentation for sequence_conv * Adding sequence_pool * Fixing review comment * Adding sequence_softmax * Updating doc for sigmoid_cross_entropy_with_logits 7 years ago			`The algorithm works as follows:`
Fix equation of sequence_softmax_op. (#6810) 7 years ago
Merge branch 'develop' into core_add_sequence_softmax_op 7 years ago			`for i-th sequence in a mini-batch:`
Fix equation of sequence_softmax_op. (#6810) 7 years ago
			`$$`
			`Out(X[lod[i]:lod[i+1]], :) = \`
			`\frac{\exp(X[lod[i]:lod[i+1], :])} \`
			`{\sum(\exp(X[lod[i]:lod[i+1], :]))}`
			`$$`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago
			`For example, for a mini-batch of 3 sequences with variable-length,`
			`each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],`
Merge branch 'develop' into core_add_sequence_softmax_op 7 years ago			`then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago			`and N turns out to be 7.`
Fixing documentation for operators (#5373) * Adding documentation for seq_expand * Adding documentation for seq_concat_op * Adding documentation for sequence_conv * Adding sequence_pool * Fixing review comment * Adding sequence_softmax * Updating doc for sigmoid_cross_entropy_with_logits 7 years ago
Initialize the sequence softmax operator. 8 years ago			`)DOC");`
			`}`
			`};`

			`class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {`
			`public:`
			`using framework::OperatorWithKernel::OperatorWithKernel;`

rename InferShapeContextBase to InferShapeContext 7 years ago			`void InferShape(framework::InferShapeContext* ctx) const override {`
Finish the SequenceSoftmaxGradKernel, using SoftmaxGradFunctor. 7 years ago			`PADDLE_ENFORCE(ctx->HasInput("Out"),`
			`"Input(Out) of SequenceSoftmaxGradOp should not be null.");`
			`PADDLE_ENFORCE(`
			`ctx->HasInput(framework::GradVarName("Out")),`
			`"Input(Out@GRAD) of SequenceSoftmaxGradOp should not be null.");`
			`PADDLE_ENFORCE(ctx->HasInput("X"),`
			`"Input(X) of SequenceSoftmaxOp should not be null.");`
			`PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),`
			`"Output(X@GRAD) of SequenceSoftmaxOp should not be null.");`

			`PADDLE_ENFORCE_EQ(`
			`ctx->GetInputDim("Out"),`
			`ctx->GetInputDim(framework::GradVarName("Out")),`
			`"Input(Out) and Input(Out@GRAD) of SequenceSoftmaxGradOp should be of "`
			`"the same shape.");`

			`ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));`
			`}`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago
			`protected:`
			`framework::OpKernelType GetExpectedKernelType(`
			`const framework::ExecutionContext& ctx) const override {`
			`// choose cudnn kernel if the runtime supported.`
			`bool use_cudnn = ctx.Attr<bool>("use_cudnn");`
			`bool runtime_cudnn_support = false;`
			`#ifdef PADDLE_WITH_CUDA`
			`if (platform::is_gpu_place(ctx.GetPlace())) {`
			`auto& dev_ctx =`
			`ctx.template device_context<platform::CUDADeviceContext>();`
			`runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;`
			`}`
			`#endif`
			`framework::LibraryType library_ = framework::LibraryType::kPlain;`
			`if (use_cudnn && runtime_cudnn_support) {`
			`library_ = framework::LibraryType::kCUDNN;`
			`}`
			`std::string data_format = ctx.Attr<std::string>("data_format");`
			`return framework::OpKernelType(`
refine sequence_softmax grad maker, test=develop (#20127) 5 years ago			`ctx.Input<Tensor>("Out")->type(), ctx.GetPlace(),`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago			`framework::StringToDataLayout(data_format), library_);`
			`}`
Initialize the sequence softmax operator. 8 years ago			`};`

refine sequence_softmax grad maker, test=develop (#20127) 5 years ago			`DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(`
			`SequenceSoftmaxGradOpNoNeedBufferVarsInferer, "X");`

Initialize the sequence softmax operator. 8 years ago			`} // namespace operators`
			`} // namespace paddle`

			`namespace ops = paddle::operators;`
script to fix all 7 years ago			`REGISTER_OPERATOR(sequence_softmax, ops::SequenceSoftmaxOp,`
			`ops::SequenceSoftmaxOpMaker,`
add semicolon to op registry (#10034) * script to add semicolon * fix typo 7 years ago			`paddle::framework::DefaultGradOpDescMaker<true>);`
refine sequence_softmax grad maker, test=develop (#20127) 5 years ago			`REGISTER_OPERATOR(sequence_softmax_grad, ops::SequenceSoftmaxGradOp,`
			`ops::SequenceSoftmaxGradOpNoNeedBufferVarsInferer);`
Initialize the sequence softmax operator. 8 years ago			`REGISTER_OP_CPU_KERNEL(`
			`sequence_softmax,`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago			`ops::SequenceSoftmaxKernel<paddle::platform::CPUDeviceContext, float>,`
			`ops::SequenceSoftmaxKernel<paddle::platform::CPUDeviceContext, double>);`
Initialize the sequence softmax operator. 8 years ago			`REGISTER_OP_CPU_KERNEL(`
			`sequence_softmax_grad,`
[Speed]implement cudnn sequence softmax cudnn (#8978) * "add softmax cudnn functor support" * "add testing" * "refine cmakelist" * "sequence softmax forward speed up" * "add softmax grad" * "fix sequence softmax test" * "add double precision' * "fix softmax test" * "add softmax cudnn support" * "fix softmax cudnn test" * "add softmax to nn.py" * "fix compile bug" * "refine cmakelist" * "fix ci" * "fix based on comment" * "fix based on comments" * "fix ci" 7 years ago			`ops::SequenceSoftmaxGradKernel<paddle::platform::CPUDeviceContext, float>,`
			`ops::SequenceSoftmaxGradKernel<paddle::platform::CPUDeviceContext, double>);`