diff --git a/paddle/operators/name_convention.md b/paddle/operators/name_convention.md index 62e7a6c844..b5cb176e00 100644 --- a/paddle/operators/name_convention.md +++ b/paddle/operators/name_convention.md @@ -44,17 +44,21 @@ public: AddOutput("Out", "(Tensor) Accumulated output tensor"); AddAttr("gamma", "(float, default 1.0) Accumulation multiplier").SetDefault(1.0f); AddComment(R"DOC( -Accumulate operator accumulates the input tensor to the output tensor. If the +Accumulate Operator. + +This operator accumulates the input tensor to the output tensor. If the output tensor already has the right size, we add to it; otherwise, we first initialize the output tensor to all zeros, and then do accumulation. Any further calls to the operator, given that no one else fiddles with the output in the interim, will do simple accumulations. -Accumulation is done as shown: + +Accumulation is done as follows: Out = 1*X + gamma*Out where X is the input tensor, Out is the output tensor and gamma is the multiplier argument. + )DOC"); } }; diff --git a/paddle/operators/rank_loss_op.cc b/paddle/operators/rank_loss_op.cc index 17ef2b1d01..061e82412e 100644 --- a/paddle/operators/rank_loss_op.cc +++ b/paddle/operators/rank_loss_op.cc @@ -26,9 +26,9 @@ class RankLossOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { // input check - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null"); - PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null"); - PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null"); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null."); auto label_dims = ctx->GetInputDim("Label"); auto left_dims = ctx->GetInputDim("Left"); @@ -50,32 +50,32 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Label", "The label indicating A ranked higher than B or not, row vector."); AddInput("Left", "The output of RankNet for doc A, vector."); - AddInput("Right", "The output of RankNet for doc B, vetor"); + AddInput("Right", "The output of RankNet for doc B, vetor."); AddOutput("Out", "The output loss of RankLoss operator, vector."); - AddComment(R"DOC(RankLoss operator + AddComment(R"DOC( +RankLoss Operator. -Rank loss operator for RankNet[1]. RankNet is a pairwise ranking model with +RankLoss operator for RankNet +(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf). +RankNet is a pairwise ranking model with one training sample consisting of a pair of doc A and B, and the label P indicating that A is ranked higher than B or not: P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of the input pair. -The RankLoss operator contains three inputs: Left (o_i), Right (o_j) and Label -(P_{i,j}), which represent the output of RankNet for two docs and the label -respectively, and yields the rank loss C_{i,j} by following the expression +The RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label +(P_{i,j}), which represent the output of RankNet for the two docs and the label, +respectively, and yields the rank loss C_{i,j} using the following equation: -\f[ +\f$$ C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\ o_{i,j} = o_i - o_j \\ \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} -\f] +\f$$ The operator can take inputs of one sample or in batch. -[1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to - Rank using Gradient Descent. - http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf )DOC"); } }; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 9eb2d79b4f..b0e87b7059 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -509,14 +509,14 @@ class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker { AddInput(kInitialStates, "rnn initial states").AsDuplicable(); AddInput(kParameters, "Parameters are used by step block as its input. However, the " - "inputs is not a sequence tensor. Every time step, each operator " - "in step block just use the parameter directly") + "input is not a sequence tensor. Every time step, each operator " + "in step block just use the parameter directly.") .AsDuplicable(); AddOutput(kOutputs, - "The output sequence of RNN. The sequence length must be same") + "The output sequence of RNN. The sequence length must be same.") .AsDuplicable(); AddOutput(kStepScopes, - "StepScopes contains all local variables in each time step."); + "StepScopes contain all local variables in each time step."); AddAttr>(kExStates, string::Sprintf( R"DOC(The ex-state variable names. @@ -556,10 +556,12 @@ if reverse is True o o o o )DOC").SetDefault(false); AddAttr(kIsTrain, "").SetDefault(true); - AddComment(R"DOC(Static Length Recurrent Operator + AddComment(R"DOC( +Static Length Recurrent Operator. + +The static length recurrent operator can only operate on fixed size sequence +data, i.e. in each mini-batch, the sequence length of all inputs are the same. -The static length recurrent operator can only operate on fix sized sequence -data, i.e. in each mini-batch, the sequence length of all inputs are same. )DOC"); } }; diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc index 0599daa768..2589a54cfc 100644 --- a/paddle/operators/reduce_op.cc +++ b/paddle/operators/reduce_op.cc @@ -80,24 +80,27 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { public: ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "X", - "(Tensor) The input tensor. Tensors with rank at most 6 are supported"); + AddInput("X", + "(Tensor) The input tensor. Tensors with rank at most 6 are " + "supported."); AddOutput("Out", "(Tensor) The result tensor."); AddAttr( "dim", - "(int, default 1) The dimension to reduce. " + "(int, default 0) The dimension to reduce. " "Must be in the range [-rank(input), rank(input)). " "If `dim < 0`, the dim to reduce is `rank + dim`. " - "Noting that reducing on the first dim will make the LoD info lost.") + "Note that reducing on the first dim will make the LoD info lost.") .SetDefault(0); AddAttr("keep_dim", "(bool, default false) " "If true, retain the reduced dimension with length 1.") .SetDefault(false); comment_ = R"DOC( -{ReduceOP} operator computes the {reduce} of input tensor along the given dimension. -The result tensor has 1 fewer dimension than the input unless `keep_dim` is true. +{ReduceOp} Operator. + +This operator computes the {reduce} of input tensor along the given dimension. +The result tensor has 1 fewer dimension than the input unless keep_dim is true. + )DOC"; AddComment(comment_); } diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc index 9213cc7a85..ba774ec216 100644 --- a/paddle/operators/reshape_op.cc +++ b/paddle/operators/reshape_op.cc @@ -71,8 +71,11 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input tensor of reshape operator."); AddOutput("Out", "The output tensor of reshape operator."); - AddAttr>("shape", "Target shape of reshape operator."); - AddComment(R"DOC(Reshape operator + AddAttr>("shape", + "(vector) " + "Target shape of reshape operator."); + AddComment(R"DOC( +Reshape Operator. Reshape Input(X) into the shape specified by Attr(shape). @@ -81,7 +84,7 @@ Given a 2-D tensor X with 2 rows and 2 columns [[1, 2], [3, 4]] -with target shape = [1, 4], the reshape operator will transform +and target shape = [1, 4], the reshape operator will transform the tensor X into a 1-D tensor: [1, 2, 3, 4] diff --git a/paddle/operators/rmsprop_op.cc b/paddle/operators/rmsprop_op.cc index fd5567a365..a9c45f639c 100644 --- a/paddle/operators/rmsprop_op.cc +++ b/paddle/operators/rmsprop_op.cc @@ -68,22 +68,22 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Param", "(Tensor, default Tensor) " - "Input parameter value that has to be updated"); + "Input parameter value that has to be updated."); AddInput("MeanSquare", "(Tensor, default Tensor)" - " The mean square value that gets updated"); + " The mean square value that gets updated."); AddInput("LearningRate", "(Tensor, default Tensor) " - "The learning rate should be a tensor of size 1"); + "The learning rate should be a tensor of size 1."); AddInput("Grad", "(Tensor, default Tensor) " - "Input gradient of the parameter"); + "Input gradient of the parameter."); AddInput("Moment", - "(Tensor, default Tensor) The moment that gets updated"); + "(Tensor, default Tensor) The moment that gets updated."); - AddOutput("ParamOut", "(Tensor) Output updated parameter value"); - AddOutput("MomentOut", "(Tensor) Output updated moment"); - AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value"); + AddOutput("ParamOut", "(Tensor) Output updated parameter value."); + AddOutput("MomentOut", "(Tensor) Output updated moment."); + AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value."); AddAttr("epsilon", "(float, default 1e-10) Constant " @@ -93,18 +93,19 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { "(float, default 0.9) " "Discounting factor for coming gradient.") .SetDefault(0.9f); - AddAttr("momentum", "(float, default 0.0) Constant value") + AddAttr("momentum", "(float, default 0.0) Constant value.") .SetDefault(0.0f); AddComment(R"DOC( +Rmsprop Optimizer. -RMSprop - -MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad +$$ +MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad \\ MomentOut = momentum * Moment + - LearningRate * Grad / sqrt(MeanSquareOut + epsilon) + \frac{LearningRate * Grad}{\sqrt{MeanSquareOut + epsilon}} \\ ParamOut = Param - MomentOut +$$ -The original slides that proposed RMSprop: Slide 29 of +The original slides that proposed Rmsprop: Slide 29 of http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) )DOC");