Paddle/paddle/fluid/operators/dropout_op.cc

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/dropout_op.h"
#include <memory>
#include <string>

namespace paddle {
namespace operators {

using framework::Tensor;

class DropoutOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");

    auto x_dims = ctx->GetInputDim("X");
    ctx->SetOutputDim("Out", x_dims);
    if (ctx->Attrs().Get<bool>("is_test") == false) {
      ctx->SetOutputDim("Mask", x_dims);
    }
    ctx->ShareLoD("X", /*->*/ "Out");
  }
};

class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "The input of dropout op.");
    AddOutput("Out", "The output of dropout op.");
    AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();

    AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
        .SetDefault(.5f)
        .AddCustomChecker([](const float& drop_p) {
          PADDLE_ENFORCE(drop_p >= 0.0f && drop_p <= 1.0f,
                         "'dropout_prob' must be between 0.0 and 1.0.");
        });
    AddAttr<bool>("is_test",
                  "(bool, default false) Set to true for inference only, false "
                  "for training. Some layers may run faster when this is true.")
        .SetDefault(false);
    AddAttr<bool>("fix_seed",
                  "A flag indicating whether to use a fixed seed to generate "
                  "random mask. NOTE: DO NOT set this flag to true in "
                  "training. Setting this flag to true is only useful in "
                  "unittest or for debug that always the same output units "
                  "will be dropped.")
        .SetDefault(false);
    AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
    AddAttr<std::string>(
        "dropout_implementation",
        "[\"downgrade_in_infer\"|\"upscale_in_train\"]"
        "There are two kinds of ways to implement dropout"
        "(the mask below is a tensor have the same shape with input"
        "the value of mask is 0 or 1, the ratio of 0 is dropout_prob)"
        "1. downgrade_in_infer(default), downgrade the outcome at inference "
        "time"
        "   train: out = input * mask"
        "   inference: out = input * (1.0 - dropout_prob)"
        "2. upscale_in_train, upscale the outcome at training time, do nothing "
        "in inference"
        "   train: out = input * mask / ( 1.0 - dropout_prob )"
        "   inference: out = input"
        "   dropout op can be removed from the program. the program will be "
        "efficient")
        .SetDefault("downgrade_in_infer")
        .AddCustomChecker([](const std::string& type) {
          PADDLE_ENFORCE(
              type == "downgrade_in_infer" || type == "upscale_in_train",
              "dropout_implementation can only be downgrade_in_infer or "
              "upscale_in_train");
        });

    AddComment(R"DOC(
Dropout Operator.

Dropout refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others
are set equal to their corresponding inputs.

)DOC");
  }
};

class DropoutOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_test"), false,
                      "GradOp is only callable when is_test is false");

    PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null.");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
                   "Input(Out@GRAD) must not be null.");

    auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));

    ctx->SetOutputDim(framework::GradVarName("X"), out_dims);
    ctx->ShareLoD(framework::GradVarName("Out"),
                  /*->*/ framework::GradVarName("X"));
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
        ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type(),
        ctx.GetPlace());
  }
};

class DropoutGradOpDescMaker : public framework::SingleGradOpDescMaker {
 public:
  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;

 protected:
  std::unique_ptr<framework::OpDesc> Apply() const override {
    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
    op->SetType("dropout_grad");
    op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
    op->SetInput("Mask", Output("Mask"));
    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
    op->SetAttrMap(Attrs());
    return op;
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,
                  ops::DropoutGradOpDescMaker);
REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);
REGISTER_OP_CPU_KERNEL(
    dropout, ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, float>,
    ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    dropout_grad,
    ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>);
Fix the grammar in copyright. (#8403) 7 years ago			`/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.`
Add dropout operator. 8 years ago
unify the indentation of license 7 years ago			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`
Add dropout operator. 8 years ago
unify the indentation of license 7 years ago			`http://www.apache.org/licenses/LICENSE-2.0`
Add dropout operator. 8 years ago
unify the indentation of license 7 years ago			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`
Add dropout operator. 8 years ago
Correct #include path 7 years ago			`#include "paddle/fluid/operators/dropout_op.h"`
fix op grad maker test=develop 6 years ago			`#include <memory>`
modify dropout att; test=develop 6 years ago			`#include <string>`
Add dropout operator. 8 years ago
			`namespace paddle {`
			`namespace operators {`

			`using framework::Tensor;`

			`class DropoutOp : public framework::OperatorWithKernel {`
			`public:`
			`using framework::OperatorWithKernel::OperatorWithKernel;`

rename InferShapeContextBase to InferShapeContext 7 years ago			`void InferShape(framework::InferShapeContext* ctx) const override {`
Refactoring InferShape (#3946) * init Infershape * add static InferShape interface * refactor add-op infershape * add AttrReader * add all maker's infershape * add all InferShape * add python infer api * add VarDesc interface * add python VarDesc and OpDesc interface * update python code * use infershape function to do shape inference * clean code * do not use pointer * refine code of op_proto_maker * add get_dims to VarDesc * refine the code * remove the dependency from operator to op registry * remove OpProtoAndCheckerMaker from operator * restore complete_add_op * add shape_infer_impl.h * code optimization * remove const return value * add fake BlockDesc class * optimize code * remove infer function in op_info * move InferShapeContextImpl to operator.h * optimize the interface of InferShapeContextBase * add temperary interface of new infershape * change add_op, clip_op, conv2d_op and activation_op * change all operators InferShape * fix SetDim * update cos_sim_op * update crop_op * update lookup_table_op * allocate tensor when call GetDim in InferShapeContext * update modified_huber_loss_op * update rowwise_add_op * update mean_op * update sequence_avg_pool_op * typo * remove old InferShape interface * can compile * fix or unit test * clean code * clean code * remove const before InferShapeContext * change InferenceContextBase to pointer * rename RunTime to Runtime, code clean 7 years ago			`PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");`

			`auto x_dims = ctx->GetInputDim("X");`
			`ctx->SetOutputDim("Out", x_dims);`
support testing when training and handle dropout and batch_norm operator in testing mode (#5734) * is_training to is_test in dropout op * handle dropout and batch_norm operator when prune pdesc in testing mode * handle dropout and batch_norm operator when prune pdesc in testing mode * add get_inference_program method * fix dropout op * fix ci * test data after each batch training * refine code * refine test_book3 * fix ci * follow comments 7 years ago			`if (ctx->Attrs().Get<bool>("is_test") == false) {`
Refactoring InferShape (#3946) * init Infershape * add static InferShape interface * refactor add-op infershape * add AttrReader * add all maker's infershape * add all InferShape * add python infer api * add VarDesc interface * add python VarDesc and OpDesc interface * update python code * use infershape function to do shape inference * clean code * do not use pointer * refine code of op_proto_maker * add get_dims to VarDesc * refine the code * remove the dependency from operator to op registry * remove OpProtoAndCheckerMaker from operator * restore complete_add_op * add shape_infer_impl.h * code optimization * remove const return value * add fake BlockDesc class * optimize code * remove infer function in op_info * move InferShapeContextImpl to operator.h * optimize the interface of InferShapeContextBase * add temperary interface of new infershape * change add_op, clip_op, conv2d_op and activation_op * change all operators InferShape * fix SetDim * update cos_sim_op * update crop_op * update lookup_table_op * allocate tensor when call GetDim in InferShapeContext * update modified_huber_loss_op * update rowwise_add_op * update mean_op * update sequence_avg_pool_op * typo * remove old InferShape interface * can compile * fix or unit test * clean code * clean code * remove const before InferShapeContext * change InferenceContextBase to pointer * rename RunTime to Runtime, code clean 7 years ago			`ctx->SetOutputDim("Mask", x_dims);`
Remove unnecessary mask operations in test phase for dropout operator. 8 years ago			`}`
Refactoring InferShape (#3946) * init Infershape * add static InferShape interface * refactor add-op infershape * add AttrReader * add all maker's infershape * add all InferShape * add python infer api * add VarDesc interface * add python VarDesc and OpDesc interface * update python code * use infershape function to do shape inference * clean code * do not use pointer * refine code of op_proto_maker * add get_dims to VarDesc * refine the code * remove the dependency from operator to op registry * remove OpProtoAndCheckerMaker from operator * restore complete_add_op * add shape_infer_impl.h * code optimization * remove const return value * add fake BlockDesc class * optimize code * remove infer function in op_info * move InferShapeContextImpl to operator.h * optimize the interface of InferShapeContextBase * add temperary interface of new infershape * change add_op, clip_op, conv2d_op and activation_op * change all operators InferShape * fix SetDim * update cos_sim_op * update crop_op * update lookup_table_op * allocate tensor when call GetDim in InferShapeContext * update modified_huber_loss_op * update rowwise_add_op * update mean_op * update sequence_avg_pool_op * typo * remove old InferShape interface * can compile * fix or unit test * clean code * clean code * remove const before InferShapeContext * change InferenceContextBase to pointer * rename RunTime to Runtime, code clean 7 years ago			`ctx->ShareLoD("X", /->/ "Out");`
Add dropout operator. 8 years ago			`}`
			`};`

			`class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {`
			`public:`
Clean OpProtoAndCheckerMaker Do not use ctor * Reduce line of codes. * We can use virtual function for Maker now. * The implementation does not care what maker holds, it is easier to refactor later. 7 years ago			`void Make() override {`
Add dropout operator. 8 years ago			`AddInput("X", "The input of dropout op.");`
			`AddOutput("Out", "The output of dropout op.");`
Update by following reviewers' comments. Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dropout. 8 years ago			`AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();`
Add dropout operator. 8 years ago
polish doc c to d 7 years ago			`AddAttr<float>("dropout_prob", "Probability of setting units to zero.")`
refine drop_out_op 7 years ago			`.SetDefault(.5f)`
			`.AddCustomChecker([](const float& drop_p) {`
refine 7 years ago			`PADDLE_ENFORCE(drop_p >= 0.0f && drop_p <= 1.0f,`
			`"'dropout_prob' must be between 0.0 and 1.0.");`
refine drop_out_op 7 years ago			`});`
add mkldnn prop_kind phase for inference-only case to pooling and activations (#14278) * add is_test to pooling and activations add prop_kind support for layers activation. conv and pooling add a pass that sets is_test to true add transpiler version of is_test pass test=develop * patch test and pass test=develop * add pass to analyzer.h test=develop * add is_test attr description & pass only on mkldnn in: activation_op.cc batch_norm_op.cc conv_op.cc dropout_op.cc lrn_op.cc pool_op.cc sequence_pool_op.cc softmax_op.cc * fix is_test handling for activation pool and conv * change description of is_test for all layers again * remove GetAttr(use_mkldnn) from pass * rename correct_mkldnn_test_phase to is_test and remove dependency on MKLDNN test=develop * review fix magic number * two if(..)s into one * Check is_test once and pass mkldnn forward prop kind * dereference shared_ptr with * (without get()) test=develop * add is_test_pass back test=develop 6 years ago			`AddAttr<bool>("is_test",`
			`"(bool, default false) Set to true for inference only, false "`
			`"for training. Some layers may run faster when this is true.")`
			`.SetDefault(false);`
fix the bug that dropout always use a fixed seed. 7 years ago			`AddAttr<bool>("fix_seed",`
			`"A flag indicating whether to use a fixed seed to generate "`
			`"random mask. NOTE: DO NOT set this flag to true in "`
			`"training. Setting this flag to true is only useful in "`
			`"unittest or for debug that always the same output units "`
			`"will be dropped.")`
			`.SetDefault(false);`
polish doc c to d 7 years ago			`AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);`
modify dropout att; test=develop 6 years ago			`AddAttr<std::string>(`
			`"dropout_implementation",`
			`"[\"downgrade_in_infer\"\|\"upscale_in_train\"]"`
			`"There are two kinds of ways to implement dropout"`
			`"(the mask below is a tensor have the same shape with input"`
			`"the value of mask is 0 or 1, the ratio of 0 is dropout_prob)"`
			`"1. downgrade_in_infer(default), downgrade the outcome at inference "`
			`"time"`
			`" train: out = input * mask"`
test=develop, fix doc 6 years ago			`" inference: out = input * (1.0 - dropout_prob)"`
modify dropout att; test=develop 6 years ago			`"2. upscale_in_train, upscale the outcome at training time, do nothing "`
			`"in inference"`
			`" train: out = input * mask / ( 1.0 - dropout_prob )"`
			`" inference: out = input"`
			`" dropout op can be removed from the program. the program will be "`
			`"efficient")`
			`.SetDefault("downgrade_in_infer")`
			`.AddCustomChecker([](const std::string& type) {`
			`PADDLE_ENFORCE(`
			`type == "downgrade_in_infer" \|\| type == "upscale_in_train",`
			`"dropout_implementation can only be downgrade_in_infer or "`
			`"upscale_in_train");`
			`});`
polish doc c to d 7 years ago
Update by following reviewers' comments. Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dropout. 8 years ago			`AddComment(R"DOC(`
			`Dropout Operator.`

polish doc c to d 7 years ago			`Dropout refers to randomly dropping out units in a nerual network. It is a`
Update by following reviewers' comments. Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dropout. 8 years ago			`regularization technique for reducing overfitting by preventing neuron`
			`co-adaption during training. The dropout operator randomly set (according to`
Correct typos for dropout operator. 8 years ago			`the given dropout probability) the outputs of some units to zero, while others`
polish doc c to d 7 years ago			`are set equal to their corresponding inputs.`

Update by following reviewers' comments. Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dropout. 8 years ago			`)DOC");`
Add dropout operator. 8 years ago			`}`
			`};`

			`class DropoutOpGrad : public framework::OperatorWithKernel {`
			`public:`
			`using framework::OperatorWithKernel::OperatorWithKernel;`

rename InferShapeContextBase to InferShapeContext 7 years ago			`void InferShape(framework::InferShapeContext* ctx) const override {`
support testing when training and handle dropout and batch_norm operator in testing mode (#5734) * is_training to is_test in dropout op * handle dropout and batch_norm operator when prune pdesc in testing mode * handle dropout and batch_norm operator when prune pdesc in testing mode * add get_inference_program method * fix dropout op * fix ci * test data after each batch training * refine code * refine test_book3 * fix ci * follow comments 7 years ago			`PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_test"), false,`
			`"GradOp is only callable when is_test is false");`
Refactoring InferShape (#3946) * init Infershape * add static InferShape interface * refactor add-op infershape * add AttrReader * add all maker's infershape * add all InferShape * add python infer api * add VarDesc interface * add python VarDesc and OpDesc interface * update python code * use infershape function to do shape inference * clean code * do not use pointer * refine code of op_proto_maker * add get_dims to VarDesc * refine the code * remove the dependency from operator to op registry * remove OpProtoAndCheckerMaker from operator * restore complete_add_op * add shape_infer_impl.h * code optimization * remove const return value * add fake BlockDesc class * optimize code * remove infer function in op_info * move InferShapeContextImpl to operator.h * optimize the interface of InferShapeContextBase * add temperary interface of new infershape * change add_op, clip_op, conv2d_op and activation_op * change all operators InferShape * fix SetDim * update cos_sim_op * update crop_op * update lookup_table_op * allocate tensor when call GetDim in InferShapeContext * update modified_huber_loss_op * update rowwise_add_op * update mean_op * update sequence_avg_pool_op * typo * remove old InferShape interface * can compile * fix or unit test * clean code * clean code * remove const before InferShapeContext * change InferenceContextBase to pointer * rename RunTime to Runtime, code clean 7 years ago
			`PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null.");`
			`PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),`
			`"Input(Out@GRAD) must not be null.");`

			`auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));`
fix op grad maker test=develop 6 years ago
			`ctx->SetOutputDim(framework::GradVarName("X"), out_dims);`
			`ctx->ShareLoD(framework::GradVarName("Out"),`
			`/->/ framework::GradVarName("X"));`
			`}`
Refine dropout gpu memory (#17095) * refine_dropout_mem,test=develop * # This is a combination of 14 commits. # The first commit's message is: remove ut test_dist_word2vec in mac ci, will fix it in private, test=develop (#17066) # This is the 2nd commit message: Fleet unify distributed training (#16791) * implement distributed transpiler with fleet # This is the 3rd commit message: ParallelDyGraph with GPU collective mode (#16827) implement dygraph.parallel.DataParallel to hook reduce op. # This is the 4th commit message: Init mixed precision training interface (#16856) * Init mixed precision training interface * Add fp16 test script test=develop * All initializers support float16 test=develop * Code cleanup & add more code annotations test=develop * Update API spec test=develop * Add usage example in doc test=develop # This is the 5th commit message: fix reference_count_pass,test=develop (#17060) test=develop # This is the 6th commit message: Speedup roi_perspective_transform op by caching the information of linear interpolation in forward (#17090) * Cache the information of linear interpolation in forward and use it in backward. test=develop * Fix cuda kernel. test=develop # This is the 7th commit message: remove unnecessary prepare_data (#17080) test=develop # This is the 8th commit message: fix interpolate cu. test=develop (#17101) # This is the 9th commit message: test=develop, double backward leaky_relu (#17067) backward of backward: leaky_relu # This is the 10th commit message: fix fuse optimizer ops (#17102) test=develop # This is the 11th commit message: truncated_gaussian_random supported in distributed training, test=develop (#17091) # This is the 12th commit message: Detailed coordinate description for yolov3 loss (#17007) * Detailed coordinate description for yolov3 loss test=develop * modified api.spec test=develop * modified loss name * fix api.spec test=develop * polish description test=develop * modified api.spec test=develop # This is the 13th commit message: fix test_weight_decay (#17109) test=develop # This is the 14th commit message: Path flag (#17105) * fix python/paddle/fluid/__init__.py detecting problems 6 years ago
			`protected:`
			`framework::OpKernelType GetExpectedKernelType(`
			`const framework::ExecutionContext& ctx) const override {`
			`return framework::OpKernelType(`
			`ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type(),`
			`ctx.GetPlace());`
			`}`
fix op grad maker test=develop 6 years ago			`};`

			`class DropoutGradOpDescMaker : public framework::SingleGradOpDescMaker {`
			`public:`
			`using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;`

			`protected:`
			`std::unique_ptr<framework::OpDesc> Apply() const override {`
			`std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());`
			`op->SetType("dropout_grad");`
			`op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));`
			`op->SetInput("Mask", Output("Mask"));`
			`op->SetOutput(framework::GradVarName("X"), InputGrad("X"));`
			`op->SetAttrMap(Attrs());`
			`return op;`
Add dropout operator. 8 years ago			`}`
			`};`

			`} // namespace operators`
			`} // namespace paddle`

			`namespace ops = paddle::operators;`
script to fix all 7 years ago			`REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,`
fix op grad maker test=develop 6 years ago			`ops::DropoutGradOpDescMaker);`
add semicolon to op registry (#10034) * script to add semicolon * fix typo 7 years ago			`REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);`
Fixed SEGFAULT of dropout operator in GPU. 8 years ago			`REGISTER_OP_CPU_KERNEL(`
add dropout attr; test=develop 6 years ago			`dropout, ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, float>,`
			`ops::CPUDropoutKernel<paddle::platform::CPUDeviceContext, double>);`
Add dropout operator. 8 years ago			`REGISTER_OP_CPU_KERNEL(`
Refine device context (#6433) There are mainly following fixes: - take `DeviceContext` as the template parameter of math functors and OpKernel instead of `Place` - remove `eigen_device` interface in base class `DeviceContext` - remove `GetEigenDevice` interface in `ExecutionContext` and base class `DeviceContext` - remove unused `platform::EigenDeviceConverter` - rename `REGISTER_OP_GPU_KERNEL` to `REGISTER_OP_CUDA_KERNEL` - rename `USE_GPU_ONLY_OP` to `USE_CUDA_ONLY_OP` 7 years ago			`dropout_grad,`
add dropout attr; test=develop 6 years ago			`ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, float>,`
			`ops::DropoutGradKernel<paddle::platform::CPUDeviceContext, double>);`