add forward computation of crf operator.

8 years ago · cc220eec36
parent cbcf11d930
commit cc220eec36
7 changed files with 231 additions and 49 deletions
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@ -114,16 +114,19 @@ class Tensor {
                             const platform::DeviceContext& ctx);
  /**
-   * @brief   Return the slice of the tensor.
+   * @brief  Return a sub-tensor of the given tensor.
   *
-   * @param[in] begin_idx   The begin index of the slice.
+   * @param[in] begin_idx   The index of the start row(inclusive) to slice.
-   * @param[in] end_idx     The end index of the slice.
+   *                        The index number begins from 0.
   * @param[in] end_idx     The index of the end row(exclusive) to slice.
   *                        The index number begins from 0.
   */
  template <typename T>
  inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
  platform::Place place() const {
-    PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder");
+    PADDLE_ENFORCE_NOT_NULL(
        holder_, "A holder must exist when calling the method place().");
    return holder_->place();
  }
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@ -168,10 +168,11 @@ inline void Tensor::CopyFromVector(const std::vector<T>& src,
 template <typename T>
 inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
  check_memory_size<T>();
-  PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero.");
+  PADDLE_ENFORCE_GE(begin_idx, 0,
-  PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound.");
+                    "The start row index must be greater than 0.");
  PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
  PADDLE_ENFORCE_LT(begin_idx, end_idx,
-                    "Begin index must be less than end index.");
+                    "The start row index must be less than the end row index.");
  if (dims_[0] == 1) {
    return *this;
--- a/paddle/operators/cross_entropy_op.cc
+++ b/paddle/operators/cross_entropy_op.cc
@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
    ctx->ShareLoD("X", /*->*/ "Y");
  }
-  // Explicitly set data type of output of the cross_entropy operator
+  // Explicitly set that data type of the output of the cross_entropy operator
  // is determined by its input "X".
  framework::DataType IndicateDataType(
      const framework::ExecutionContext& ctx) const override {
--- a/paddle/operators/linear_chain_crf_op.cc
+++ b/paddle/operators/linear_chain_crf_op.cc
--- a/paddle/operators/linear_chain_crf_op.h
+++ b/paddle/operators/linear_chain_crf_op.h
@ -19,27 +19,31 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using Tensor = framework::Tensor;
+using framework::Tensor;
 template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
-template <typename T>
+template <typename Place, typename T>
 class LinearChainCrfOpKernel : public framework::OpKernel<T> {
 public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
+  void Compute(const framework::ExecutionContext& ctx) const override;
-    PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
+
-                   "This kernel only runs on CPU.");
+ protected:
-  }
+  T ForwardOneSequence(const platform::DeviceContext& ctx,
                       const Tensor& emission, Tensor& emission_row_max,
                       Tensor& emission_exps, const Tensor& trans_weights,
                       Tensor& trans_weight_exps, const Tensor& label,
                       Tensor& a) const;
 private:
  T NormalizeL1(T* x, size_t len) const;
 };
-template <typename T>
+template <typename Place, typename T>
 class LinearChainCrfGradOpKernel : public framework::OpKernel<T> {
 public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
+  void Compute(const framework::ExecutionContext& ctx) const override;
    PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
                   "This kernel only runs on CPU.");
  }
 };
 }  // namespace operators
--- a/paddle/operators/softmax_with_cross_entropy_op.cc
+++ b/paddle/operators/softmax_with_cross_entropy_op.cc
@ -60,19 +60,23 @@ Because this operators performs a softmax on logits internally, it expects
 unscaled logits. Please do not call this op with the output of softmax operator,
 which will produce incorrect results.
-This operators expects mutually exclusive hard labels, each sample in a batch
+When the attribute softLabel is set false, this operators expects mutually
-is in exactly one class with probabilities 1. Each sample in the batch with one
+exclusive hard labels, each sample in a batch is in exactly one class with
-and only one label.
+probabilities 1. Each sample in the batch with one and only one label.
 Equation:
 1) hard label (one-hot label)
-Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), j = 1, ..., K
+Loss_j = \f$ -\text{Logit}_{Label_j} +
 \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right),
 j = 1, ..., K $\f
 2) soft label (a distribution over all classes)
-Loss_j = -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), j = 1,...,K
+Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i -
 \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right),
 j = 1,...,K $\f
 )DOC");
  }
--- a/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py
+++ b/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py
@ -61,13 +61,13 @@ class LinearChainCrfForward(object):
            s += alpha[-1, i] * self.b_exps[i]
        log_likelihood -= np.log(s)
-        # calculate the noninator part.
+        # calculate the nominator part.
        log_likelihood += (
            self.a[label[0]] + self.x[0, label[0]] + self.b[label[-1]])
        for k in range(1, seq_len):
            log_likelihood += (
                self.x[k, label[k]] + self.w[label[k - 1], label[k]])
-        return log_likelihood
+        return -log_likelihood
    def crf_forward_compute(self):
        for i in range(self.seq_num):
@ -102,7 +102,7 @@ class TestLinearChainCrfOp(OpTest):
        self.inputs = {
            "Emission": (emission, lod),
            "Transition": transition,
-            "label": (labels, lod)
+            "Label": (labels, lod)
        }
        crf = LinearChainCrfForward(lod[0], emission, transition, labels)