Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into add-GRUOp-dev

8 years ago · c4f7f3a562
parent 1d85b2bd17 e745bcfc5e
commit c4f7f3a562
70 changed files with 1294 additions and 484 deletions
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -45,8 +45,9 @@ add_custom_command(TARGET framework_py_proto POST_BUILD

 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
+cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)

-cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog)
+cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table)

 cc_library(prune SRCS prune.cc DEPS framework_proto)
 cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@ -21,6 +21,7 @@ limitations under the License. */
 #include <vector>

 #include "paddle/framework/feed_fetch_type.h"
+#include "paddle/framework/lod_rank_table.h"
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/scope.h"
@ -70,10 +71,12 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == VarDesc::STEP_SCOPES) {
    var->GetMutable<std::vector<framework::Scope>>();
+  } else if (var_type == VarDesc::LOD_RANK_TABLE) {
+    var->GetMutable<LoDRankTable>();
  } else {
    PADDLE_THROW(
        "Variable type %d is not in "
-        "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST]",
+        "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE]",
        var_type);
  }
 }
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@ -116,6 +116,7 @@ message VarDesc {
    FEED_MINIBATCH = 3;
    FETCH_LIST = 4;
    STEP_SCOPES = 5;
+    LOD_RANK_TABLE = 6;
  }
  required string name = 1;
  required VarType type = 2;
--- a/paddle/framework/lod_rank_table.cc
+++ b/paddle/framework/lod_rank_table.cc
@ -0,0 +1,43 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/lod_rank_table.h"
+
+namespace paddle {
+namespace framework {
+void LoDRankTable::Reset(const LoD& lod, size_t level) {
+  this->coarse_lod_.clear();
+  this->items_.clear();
+  PADDLE_ENFORCE(level < lod.size(),
+                 "Cannot rank lod since the level %d is less than lod size %d",
+                 level, lod.size());
+  coarse_lod_.reserve(level);
+  for (size_t i = 0; i < level; ++i) {
+    coarse_lod_.push_back(lod[i]);
+  }
+  auto& vec = lod[level];
+  for (size_t i = 0; i < vec.size() - 1; ++i) {
+    TableItem item;
+    item.index = i;
+    item.length = vec[i + 1] - vec[i];
+    items_.emplace_back(item);
+  }
+  std::sort(items_.begin(), items_.end(),
+            [](const TableItem& a, const TableItem& b) {
+              return a.length > b.length;
+            });
+}
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/lod_rank_table.h
+++ b/paddle/framework/lod_rank_table.h
@ -0,0 +1,55 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include "paddle/framework/lod_tensor.h"
+
+namespace paddle {
+namespace framework {
+
+// LoD Rank Table stores the `level` of `lod` which is ordered by sequence
+// length in descending order. It is useful when implement dynamic RNN and is
+// shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
+// output operators.
+//
+// The table item contains two element. The length of sequence and the index of
+// sequence in that level.
+//
+// LoDRankTable also stores the coarse_lod, which is the lod information whose
+// level is less than input level, in order to restore the output LoD
+// information.
+class LoDRankTable {
+ public:
+  struct TableItem {
+    size_t index;
+    size_t length;
+  };
+
+  LoDRankTable() {}
+
+  void Reset(const LoD& lod, size_t level);
+
+  const std::vector<TableItem>& items() const { return this->items_; }
+
+  const LoD& coarse_lod() const { return this->coarse_lod_; }
+
+  size_t level() const { return coarse_lod_.size(); }
+
+ private:
+  LoD coarse_lod_;
+  std::vector<TableItem> items_;
+};
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
  // indicate kernel DataType by input data. Defaultly all input data must be
  // same.
  virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
-    VLOG(3) << "Default IndicateDataType " << this->Type();
    auto& scope = ctx.scope();
    int data_type = -1;
    for (auto& input : this->inputs_) {
@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
          }
          if (t != nullptr) {
            int tmp = static_cast<int>(ToDataType(t->type()));
-            VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
            PADDLE_ENFORCE(tmp == data_type || data_type == -1,
                           "DataType of Paddle Op %s must be the same.",
                           Type());
--- a/paddle/framework/var_desc.h
+++ b/paddle/framework/var_desc.h
@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once

 #include <vector>
+#include "glog/logging.h"
 #include "paddle/framework/framework.pb.h"

 namespace paddle {
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@ -141,6 +141,7 @@ set(DEPS_OPS
    pool_with_index_op
    nccl_op
    sequence_conv_op
+    lod_rank_table_op
    lstm_op
    gru_op)

@ -150,6 +151,7 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
 op_library(sum_op DEPS net_op selected_rows_functor)
 op_library(pool_op DEPS pooling)
 op_library(pool_with_index_op DEPS pooling)
+op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table)
 if(WITH_GPU)
 op_library(nccl_op DEPS nccl_common)
 endif()
--- a/paddle/operators/accuracy_op.cc
+++ b/paddle/operators/accuracy_op.cc
@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {

    auto inference_dim = ctx->GetInputDim("Out");
    auto label_dim = ctx->GetInputDim("Label");
-    // Assume indices has same shape with infernece, because
+    // Assume indices has same shape as inference, because
    // it's the output of topk.

    PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2.");
@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
                  framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    // TODO(typhoonzero): support both inference value and indices.
-    AddInput("Out", "topk (inferences) the network output");
-    AddInput("Indices", "topk (indices) the network output");
+    AddInput("Out", "The network output of topk (inferences)");
+    AddInput("Indices", "The the network output of topk (indices)");
    AddInput("Label", "Label of the training data");
    // TODO(typhoonzero): AddInput("Weight", ...
    AddOutput("Accuracy", "The accuracy of current batch");

    AddComment(R"DOC(
-Accuracy. It will print accuracy rate for classification.
-The accuracy is:
-..  math::
-accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
+Accuracy Operator. 
+
+It will print accuracy rate for classification.
+The accuracy is calculated as follows:
+
+$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
+
+Both the input Out and Label can carry the LoD (Level of Details)
+information, or not. But the output only shares the LoD information 
+with the input Out(Inference).

-Both the input `Out` and `Label` can carry the LoD (Level of Details)
-information, or not. But the output only shares the LoD with input `Inference`.
 )DOC");
  }
 };
--- a/paddle/operators/batch_norm_op.cc
+++ b/paddle/operators/batch_norm_op.cc
@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
    PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");

+    const float epsilon = ctx->Attrs().Get<float>("epsilon");
+    PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
+    PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
+
    // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
    PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
                      "Mean and MeanOut should share the same memory");
@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {

  framework::DataType IndicateDataType(
      const framework::ExecutionContext &ctx) const override {
-    VLOG(3) << "IndicateDataType " << this->Type();
    const auto *var = ctx.InputVar(framework::GradVarName("Y"));
    if (var == nullptr) {
      PADDLE_THROW("can't find Y@GRAD");
--- a/paddle/operators/concat_op.cc
+++ b/paddle/operators/concat_op.cc
@ -56,20 +56,24 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "the input tensors of concat operator.").AsDuplicable();
-    AddOutput("Out", "the output tensor of concat operator.");
-    AddComment(R"DOC(
-            Join the input tensors along with the axis.
-            Examples:
-              Input[0] = [[1,2],[3,4]]
-              Input[1] = [[5,6]]
-              axis = 0
-              Output = [[1,2],
-                        [3,4],
-                        [5,6]]
-        )DOC");
-    AddAttr<int>("axis", "The axis which the inputs will be joined with.")
+    AddInput("X", "Input tensors of concat operator.").AsDuplicable();
+    AddOutput("Out", "Output tensor of concat operator.");
+    AddAttr<int>("axis",
+                 "The axis along which the input tensors will be concatenated.")
        .SetDefault(0);
+    AddComment(R"DOC(
+Concat Operator.
+
+Concatenate the input tensors along dimension axis.
+Examples:
+  Input[0] = [[1,2],[3,4]]
+  Input[1] = [[5,6]]
+  axis = 0
+  Output = [[1,2],
+            [3,4],
+            [5,6]]
+
+)DOC");
  }
 };

--- a/paddle/operators/cond_op.cc
+++ b/paddle/operators/cond_op.cc
@ -216,11 +216,12 @@ class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput("IndexTensors", "Index Tensors contains indices for true/false");

    AddComment(R"DOC(
-Sample dependent Cond Operator:
-Given Cond[i] as a 1/0 vector to indicate true/false
-The equation is:
-Out[i] = subnet_t[i], if Cond[i] == true
-Out[i] = subnet_t[i], if Cond[i] == false
+Sample Dependent Conditional Operator.
+
+Given Cond[i] as a 1/0 vector to indicate true/false:
+Out[i] = subnet_true[i], if Cond[i] == true
+Out[i] = subnet_false[i], if Cond[i] == false
+
 )DOC");
  }
 };
--- a/paddle/operators/conv2d_op.cc
+++ b/paddle/operators/conv2d_op.cc
@ -56,17 +56,18 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
  AddInput(
      "Input",
      "The input tensor of convolution operator. "
-      "The format of input tensor is NCHW. Where N is batch size, C is the "
-      "number of channels, H and W is the height and width of image.");
+      "The format of input tensor is NCHW, where N is batch size, C is the "
+      "number of channels, H is the height of the image, "
+      "and W is the width of the image.");
  AddInput("Filter",
-           "The filter tensor of convolution operator."
+           "The filter tensor of convolution operator. "
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
-           "H and W is height and width of filter. "
-           "If the groups attribute is greater than 1, C equal the number of "
+           "H is the height of the filter, and W is the width of the filter. "
+           "If the groups attribute is greater than 1, C equals the number of "
           "input image channels divided by the groups.");
  AddOutput("Output",
-            "The output tensor of convolution operator."
+            "The output tensor of convolution operator. "
            "The format of output tensor is also NCHW.");
  AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
      .SetDefault({1, 1});
@ -74,16 +75,19 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
      .SetDefault({0, 0});
  AddAttr<int>(
      "groups",
-      "group size of convolution operator. "
-      "Refer to grouped convolution in Alex Krizhevsky's paper: "
-      "when group=2, the first half of the filters are only connected to the "
-      "first half of the input channels, and the second half only connected "
-      "to the second half.")
+      "Group size of convolution operator. "
+      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
+      "when group=2, the first half of the filters is only connected to the "
+      "first half of the input channels, while the second half of the filters "
+      "is only connected to the second half of the input channels.")
      .SetDefault(1);
  AddComment(R"DOC(
-The convolution operation calculates the output based on the input, filter
-and strides, paddings, groups parameters. The size of each dimension of the
-parameters is checked in the infer-shape.
+Convolution Operator.
+
+The convolution operation calculates the output based on the input, filter, 
+strides, paddings, and groups parameters. The size of each dimension of the
+parameters is checked in the infer-shape method.
+
 )DOC");
 }

--- a/paddle/operators/conv2d_transpose_op.cc
+++ b/paddle/operators/conv2d_transpose_op.cc
@ -54,15 +54,16 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
  AddInput(
      "Input",
      "(Tensor) The input tensor of convolution transpose operator. "
-      "The format of input tensor is NCHW. Where N is batch size, C is the "
-      "number of input channels, H and W is the height and width of image.");
+      "The format of input tensor is NCHW, where N is batch size, C is the "
+      "number of input channels, H is the height of the image, and "
+      "W is the width of the image.");
  AddInput("Filter",
           "(Tensor) The filter tensor of convolution transpose operator."
           "The format of the filter tensor is CMHW, where C is the number of "
           "output image channels, M is the number of input image channels, "
-           "H and W is height and width of filter. "
+           "H is the height of the filter, and W is the width of the filter. "
           "We enforce groups number == 1 and padding == 0 in "
-           "convolution transpose Scenario.");
+           "the convolution transpose scenario.");
  AddOutput("Output",
            "(Tensor) The output tensor of convolution transpose operator."
            "The format of output tensor is also NCHW.");
@ -73,9 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
                            "paddings of convolution transpose operator.")
      .SetDefault({0, 0});
  AddComment(R"DOC(
-The convolution transpose operation calculates the output based on the input, filter
-and strides, paddings, groups parameters. The size of each dimension of the
-parameters is checked in the infer-shape.
+Convolution Transpose Operator.
+
+The convolution transpose operation calculates the output based on the input, 
+filter, strides, paddings, and groups parameters. The size of each dimension 
+of the parameters is checked in the infer-shape method.
+
 )DOC");
 }

--- a/paddle/operators/conv_cudnn_op.cc
+++ b/paddle/operators/conv_cudnn_op.cc
@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
                 "workspace is a section of GPU memory which will be "
                 "allocated/freed each time the operator runs, larger "
                 "workspace size can increase performance but also requires "
-                 "better hardward. This size should be carefully setted.")
+                 "better hardware. This size should be chosen carefully.")
        .SetDefault(4096);
  }
 };
--- a/paddle/operators/conv_shift_op.cc
+++ b/paddle/operators/conv_shift_op.cc
@ -96,14 +96,13 @@ as used in the Neural Turing Machine: https://arxiv.org/abs/1410.5401

 The equation is:

-  \f[
-      Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}
-  \f]
+$$Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}$$

-where X's index is computed modulo M, and b's index is computed modulo N.
+where X's index is computed modulo M, and Y's index is computed modulo N.
+
+Both inputs X and Y can carry LoD (Level of Details) information.
+However, the output only shares the LoD information with input X.

-Both of the input `X` and `Y` can carry LoD (Level of Details) information.
-However, the output only shares the LoD information with input `X`.
 )DOC");
  }
 };
--- a/paddle/operators/cos_sim_op.cc
+++ b/paddle/operators/cos_sim_op.cc
@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 Cosine Similarity Operator.

-The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
+$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$

-The input `X` and `Y` must have the same shape, except that the 1st dimension
-of input `Y` could be just 1 (different from input `X`), which will be
-broadcasted to match the shape of input `X` before computing their cosine
+The input X and Y must have the same shape, except that the 1st dimension
+of input Y could be just 1 (different from input X), which will be
+broadcasted to match the shape of input X before computing their cosine
 similarity.

-Both the input `X` and `Y` can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD with input `X`.
+Both the input X and Y can carry the LoD (Level of Details) information,
+or not. But the output only shares the LoD information with input X.
+
 )DOC");
  }
 };
--- a/paddle/operators/crf_decoding_op.cc
+++ b/paddle/operators/crf_decoding_op.cc
@ -0,0 +1,136 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/crf_decoding_op.h"
+
+namespace paddle {
+namespace operators {
+class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  CRFDecodingOpMaker(framework::OpProto* proto,
+                     framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Emission",
+             "(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape "
+             "[N x D] where N is the size of the mini-batch and D is the total "
+             "tag number. This input is the unscaled emission weight matrix of "
+             "the linear_chain_crf operator.");
+    AddInput(
+        "Transition",
+        "(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
+        "This input is the transition weights learned by the linear_chain_crf "
+        "operator, denoted as w. The 1st row of w are transition weights for "
+        "the start mask. The 2nd row of w are transition weights for the end "
+        "mask. Transition weights between other tags begin from the 3rd row of "
+        "w. See more details in comments of the linear_chain_crf operator.");
+    AddInput(
+        "Label",
+        "(LoDTensor,  LoDTensor<int>). The ground truth with shape "
+        "[N x 1]. This input is optional. See more details in the operator's "
+        "comments.")
+        .AsDispensable();
+    AddOutput("ViterbiPath",
+              "(LoDTensor, LoDTensor<int>). The decoding results. What to "
+              "return changes depending on whether the Input(Label) (the groud "
+              "truth) is given. See more details in the operator's comment.");
+    AddComment(R"DOC(
+The crf_decoding operator reads the emission feature weights and the transition
+freature weights learned by the linear_chain_crf operator. It implements the
+Viterbi algorithm which is a dynamic programming algorithm for finding the most
+likely sequence of hidden states, called the Viterbi path, that results in a
+sequence of observed tags.
+
+The output of this operator changes according to whether Input(Label) is given:
+
+1. Input(Label) is given:
+
+This happens in training. This operator is used to co-work with the chunk_eval
+operator.
+
+When Input(Label) is given, the crf_decoding operator returns a row vector
+with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
+prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
+input to chunk_eval operator.
+
+2. Input(Label) is not given:
+
+This is the standard decoding process.
+
+The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
+range from 0 to maximum tag number - 1. Each element indicates an index of a
+predicted tag.
+)DOC");
+  }
+};
+
+class CRFDecodingOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Emission"),
+                   "Input(Emission) should be not null.");
+    PADDLE_ENFORCE(ctx->HasInput("Transition"),
+                   "Input(Transition) should be not null.");
+
+    PADDLE_ENFORCE(ctx->HasOutput("ViterbiPath"),
+                   "Output(ViterbiPath) should be not null.");
+
+    auto emission_dims = ctx->GetInputDim("Emission");
+    PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL,
+                      "The Input(Emission) should be a 2-D tensor.");
+    PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed.");
+
+    auto transition_dims = ctx->GetInputDim("Transition");
+    PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL,
+                      "The Input(Transition) should be a 2-D tensor.");
+    PADDLE_ENFORCE_EQ(
+        transition_dims[0] - 2, transition_dims[1],
+        "An invalid dimension for the Input(Transition), which should "
+        "be a 2-D tensor with shape [(D + 2) x D].");
+    PADDLE_ENFORCE_EQ(
+        emission_dims[1], transition_dims[1],
+        "The 2nd dimension of the Input(Emission) and the Input(Transition) "
+        "should be equal to the tag number.");
+
+    if (ctx->HasInput("Label")) {
+      auto label_dims = ctx->GetInputDim("Label");
+      PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL,
+                     "The Input(Label) should be a 2-D tensor with the 2nd "
+                     "dimensions fixed to 1.");
+      PADDLE_ENFORCE_EQ(
+          emission_dims[0], label_dims[0],
+          "The height of Input(Emission) and the height of Input(Label) "
+          "should be the same.");
+    }
+
+    ctx->ShareLoD("Emission", /*->*/ "ViterbiPath");
+    ctx->SetOutputDim("ViterbiPath", {emission_dims[0], 1});
+  }
+
+ protected:
+  framework::DataType IndicateDataType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(crf_decoding, ops::CRFDecodingOp,
+                             ops::CRFDecodingOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    crf_decoding, ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, float>,
+    ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, double>);
--- a/paddle/operators/crf_decoding_op.h
+++ b/paddle/operators/crf_decoding_op.h
@ -0,0 +1,127 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::LoDTensor;
+using framework::LoD;
+using framework::Tensor;
+
+template <typename Place, typename T>
+class CRFDecodingOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
+                   "The crf_decoding operator can only run on CPU.");
+
+    auto* emission_weights = ctx.Input<LoDTensor>("Emission");
+    auto* transition_weights = ctx.Input<Tensor>("Transition");
+    auto* label = ctx.Input<LoDTensor>("Label");
+    auto* decoded_path = ctx.Output<Tensor>("ViterbiPath");
+
+    PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
+                      "The Input(Emission) should be a sequence.");
+    auto lod = emission_weights->lod();
+    PADDLE_ENFORCE(lod.size(), "Input(Emission) must be a sequence.");
+    const size_t level = 0;
+    const size_t seq_num = lod[level].size() - 1;
+
+    int* path = decoded_path->mutable_data<int>(platform::CPUPlace());
+    math::SetConstant<platform::CPUPlace, int>()(ctx.device_context(),
+                                                 decoded_path, 0);
+    for (size_t i = 0; i < seq_num; ++i) {
+      int start_pos = static_cast<int>(lod[level][i]);
+      int end_pos = static_cast<int>(lod[level][i + 1]);
+      Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
+      Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights,
+             &decoded_path_one_seq);
+    }
+
+    if (label) {
+      PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
+                        "The Input(Label) should be a sequence.");
+      const int* label_value = label->data<int>();
+      size_t batch_size = emission_weights->dims()[0];
+      for (size_t i = 0; i < batch_size; ++i) {
+        path[i] = label_value[i] == path[i] ? 1 : 0;
+      }
+    }
+  }
+
+ private:
+  void Decode(const Tensor& emission_weights, const Tensor& transition_weights,
+              Tensor* decoded_path) const {
+    auto emission_dims = emission_weights.dims();
+    const size_t seq_len = emission_dims[0];
+    const size_t tag_num = emission_dims[1];
+
+    const size_t state_trans_base_idx = 2;
+
+    const T* x = emission_weights.data<T>();
+    const T* w = transition_weights.data<T>();
+    int* path = decoded_path->data<int>();
+
+    // alpha is a memo table. An element alpha(k, v) records the score of the
+    // best sequence of tags from position 1 to position k with v being the end
+    // tag.
+    Tensor alpha;
+    T* alpha_value = alpha.mutable_data<T>(emission_dims, platform::CPUPlace());
+    Tensor track;
+    int* track_value =
+        track.mutable_data<int>(emission_dims, platform::CPUPlace());
+
+    for (size_t i = 0; i < tag_num; ++i) alpha_value[i] = w[i] + x[i];
+
+    for (size_t k = 1; k < seq_len; ++k) {
+      for (size_t i = 0; i < tag_num; ++i) {
+        T max_score = -std::numeric_limits<T>::max();
+        int max_j = 0;
+        for (size_t j = 0; j < tag_num; ++j) {
+          T score = alpha_value[(k - 1) * tag_num + j] +
+                    w[(j + state_trans_base_idx) * tag_num + i];
+          if (score > max_score) {
+            max_score = score;
+            max_j = j;
+          }
+        }
+
+        alpha_value[k * tag_num + i] = max_score + x[k * tag_num + i];
+        track_value[k * tag_num + i] = max_j;
+      }
+    }
+
+    T max_score = -std::numeric_limits<T>::max();
+    int max_i = 0;
+    for (size_t i = 0; i < tag_num; ++i) {
+      T score = alpha_value[(seq_len - 1) * tag_num + i] + w[tag_num + i];
+      if (score > max_score) {
+        max_score = score;
+        max_i = i;
+      }
+    }
+    path[seq_len - 1] = max_i;
+    for (int k = seq_len - 1; k >= 1; --k) {
+      path[k - 1] = max_i = track_value[k * tag_num + max_i];
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/operators/crop_op.cc
+++ b/paddle/operators/crop_op.cc
@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X",
             "The input of pad op. "
-             "The input should be a k-D tensor(k > 0 and k < 7)");
+             "The input should be a k-D tensor(k > 0 and k < 7).");
    AddInput("Y",
-             "The input used as reference for cropping"
-             " with the same dimension as X. ")
+             "The input used as reference for cropping, "
+             "which is of the same dimensions as X.")
        .AsDispensable();
    AddOutput("Out",
-              "The output of crop op "
-              "with the same dimension as X.");
+              "The output of crop op, "
+              "which is of the same dimensions as X.");
    AddAttr<std::vector<int>>("offsets",
-                              "A list<int> describing offsets to be cropped."
-                              "The size of offsets list should be as same as "
-                              "dimension size of  input X.");
+                              "A list<int> describing offsets to be cropped. "
+                              "The size of offsets list should be the same as "
+                              "the dimension size of input X.");
    AddAttr<std::vector<int>>("shape",
-                              "A list<int> describing the shape of output."
-                              "The size of shape list should be as same as "
-                              "dimension size of  input X.")
+                              "A list<int> describing the shape of output. "
+                              "The size of shape list should be the same as "
+                              "the dimension size of input X.")
        .SetDefault(std::vector<int>());
    AddComment(R"DOC(
 Crop Operator.
+
 Crop input into output, as specified by offsets and shape.

 There are two ways to set shape:
-1. referenc input: crop input X as shape as reference input.
+1. reference input: crop input X into the same shape as reference input.
                    The dimension of reference input should
-                    be as same as input X.
-2. shape list: crop input X by shape described by a list<int>.
-               The size of shape list should be as same as
-               dimension size of  input X.
+                    be the same as the dimension of input X.
+2. shape list: crop input X into the shape described by a list<int>.
+               The size of shape list should be the same as
+               the dimension size of input X.

 The input should be a k-D tensor(k > 0 and k < 7). As an example:

@ -91,20 +92,20 @@ Given:

    X = [[0, 1, 2, 0, 0]
         [0, 3, 4, 0, 0]
-         [0, 0, 0, 0, 0]]
+         [0, 0, 0, 0, 0]],

 and

-    offsets = [0, 1]
+    offsets = [0, 1],

 and

-    shape = [2, 2]
+    shape = [2, 2],

-then we get
+we get:

    Out = [[1, 2],
-           [3, 4]]
+           [3, 4]].

 )DOC");
  }
--- a/paddle/operators/cross_entropy_op.cc
+++ b/paddle/operators/cross_entropy_op.cc
@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
  }

 protected:
-  // Explicitly set that data type of the output of the cross_entropy operator
+  // Explicitly set that the data type of computation kernel of cross_entropy
  // is determined by its input "X".
  framework::DataType IndicateDataType(
      const framework::ExecutionContext& ctx) const override {
@ -96,7 +96,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
  }

 protected:
-  // CrossEntropy's data type just determined by "X"
+  // Explicitly set that the data type of computation kernel of cross_entropy
+  // is determined by its input "X".
  framework::DataType IndicateDataType(
      const framework::ExecutionContext& ctx) const override {
    return framework::ToDataType(ctx.Input<Tensor>("X")->type());
@ -117,9 +118,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
        "Label",
        "(Tensor, default Tensor<int>), the ground truth which is "
        "a 2-D tensor. "
-        "When soft_label is set to false, `Label` is a Tensor<int> with shape "
+        "When soft_label is set to false, Label is a Tensor<int> with shape "
        "[N x 1]. "
-        "When soft_label is set to true, `Label` is a Tensor<float/double> "
+        "When soft_label is set to true, Label is a Tensor<float/double> "
        "with shape [N x K].");
    AddOutput("Y",
              "(Tensor, default Tensor<float>), a 2-D tensor "
@ -137,13 +138,13 @@ computation.
 1) One-hot cross-entropy:
    soft_label = false, Label[i, 0] indicates the class index for sample i:

-                Y[i] = -log(X[i, Label[i]])
+                $Y[i] = -\log(X[i, Label[i]])$

 2) Soft-label cross-entropy:
    soft_label = true, Label[i, j] indicates the soft label of class j
    for sample i:

-                Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
+                $Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$

   Please make sure that in this case the summuation of each row of Label
   equals one.
@ -153,8 +154,9 @@ computation.
     non-zero element (equals 1), soft-label cross-entropy degenerates to a
     one-hot cross-entropy with one-hot label representation.

-Both the input `X` and `Label` can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD with input `X`.
+Both the input X and Label can carry the LoD (Level of Details) information,
+or not. But the output only shares the LoD information with input X.
+
 )DOC");
  }
 };
--- a/paddle/operators/decayed_adagrad_op.cc
+++ b/paddle/operators/decayed_adagrad_op.cc
@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
                   "Constant for numerical stability")
        .SetDefault(1.0e-6f);
    AddComment(R"DOC(
+Decayed Adagrad Optimizer.

-Decayed Adagrad
+The update is done as follows:

-moment_out = decay * moment + (1 - decay) * grad * grad
-param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
+$$
+moment\_out = decay * moment + (1 - decay) * grad * grad \\
+param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
+$$
+
+The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+does not have an epsilon attribute. It is added here for numerical
+stability to avoid the division by zero error.

 )DOC");
  }
--- a/paddle/operators/dropout_op.cc
+++ b/paddle/operators/dropout_op.cc
@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
  DropoutOpMaker(framework::OpProto* proto,
                 framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
-        .SetDefault(.5f);
-    AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
-    AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
    AddInput("X", "The input of dropout op.");
    AddOutput("Out", "The output of dropout op.");
    AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();

+    AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
+        .SetDefault(.5f);
+    AddAttr<bool>("is_training", "True if in training phase.").SetDefault(true);
+    AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
+
    AddComment(R"DOC(
 Dropout Operator.

-'Dropout' refers to randomly dropping out units in a nerual network. It is a
+Dropout refers to randomly dropping out units in a nerual network. It is a
 regularization technique for reducing overfitting by preventing neuron
 co-adaption during training. The dropout operator randomly set (according to
 the given dropout probability) the outputs of some units to zero, while others
-being set to their inputs.
+are set equal to their corresponding inputs.
+
 )DOC");
  }
 };
--- a/paddle/operators/dynamic_recurrent_op.cc
+++ b/paddle/operators/dynamic_recurrent_op.cc
@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker
        RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward];
    // inputs and outputs stored in proto
    AddInput(name.inlinks,
-             "the inputs that need to be segmented for each step.")
+             "The inputs that need to be segmented for each step.")
        .AsDuplicable();
-    AddInput(name.initial_states, "variables to initialize states.")
+    AddInput(name.initial_states, "Variables to initialize the states.")
        .AsDuplicable();

-    AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
+    AddOutput(name.outlinks,
+              "The outputs that need to be concatenated for all steps.")
        .AsDuplicable();
    AddOutput(name.step_scopes, "step scopes");

@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker
    AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states");
    AddAttr<std::vector<std::string>>(name.states, "names of states");

-    AddComment("This is a RNN operator for varience-length sequences.");
+    AddComment(R"DOC(
+Dynamic Recurrent Operator.
+
+This is a RNN operator for varience-length sequences.
+
+)DOC");
  }
 };

--- a/paddle/operators/elementwise_add_op.cc
+++ b/paddle/operators/elementwise_add_op.cc
@ -22,7 +22,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
  ElementwiseAddOpMaker(framework::OpProto* proto,
                        framework::OpAttrChecker* op_checker)
      : ElementwiseOpMaker(proto, op_checker) {
-    SetComment("add", "Out = X + Y");
+    SetComment("Add", "$Out = X + Y$");
    AddComment(comment_);
  }
 };
--- a/Show More
+++ b/Show More