some comments.

8 years ago · 1482ec430a
parent df66957ec3
commit 1482ec430a
1 changed files with 43 additions and 45 deletions
--- a/paddle/function/ContextProjectionOp.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
@ -18,6 +18,10 @@ limitations under the License. */

 namespace paddle {

+/**
+ * Context Projection Forward with CPU Matrix Device.
+ *
+ */
 template <>
 void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
                                               const CpuMatrix& input_mat,
@ -70,8 +74,26 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
 }

 /**
- * \param outputs[0] output value.
+ * Paddle Function for Context Projection Forward.
+ * Calculate the value for the output layer with context projection.
+ *
+ * What is Context Projection?
+ * For example, assumed input (x) has 4 words and the dimension of each word
+ * representation is 2. If we use zero to pad instead of learned weight to pad,
+ * and the context_lenth is 3, the output (y) is:
 *
+ * @code
+ *  x = [a1, a2;
+ *       b1, b2;
+ *       c1, c2;
+ *       d1, d2]
+ *  y = [0,  0,  a1, a2, b1, b2;
+ *       a1, a2, b1, b2, c1, c2;
+ *       b1, b2, c1, c2, d1, d2;
+ *       c1, c2, d1, d2, 0,  0]
+ * @endcode
+ *
+ * \param outputs[0] output value.
 * \param inputs[0]  input value.
 * \param inputs[1]  input weight.
 * \param inputs[2]  input sequence.
@ -123,6 +145,10 @@ private:
  size_t begin_pad_;
 };

+/**
+ * Context Projection Backward with CPU Matrix Device.
+ *
+ */
 template <>
 <<<<<<< HEAD
 void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
@ -178,6 +204,9 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
 }

 /**
+ * Context Projection Backward Function.
+ * Update the weight gradient and input layer gradient with backprop
+ *
 * \param inputs[0]      input sequence.
 * \param inputs[1]      output grad.
 * \param inouts[0]      input grad.
@ -194,7 +223,6 @@ public:
    total_pad_ = config.get<size_t>("total_pad");
  }

-<<<<<<< HEAD
  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
    CHECK_EQ((size_t)3, inputs.size());
    CHECK_EQ((size_t)1, outputs.size());
@ -213,42 +241,6 @@ public:
    CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);

    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
-=======
-  void calc(const Arguments& inputs,
-            const Arguments& outputs,
-            const Arguments& inouts) override {
-    CHECK_EQ(2, inputs.size());
-    CHECK_EQ(0, outputs.size());
-    CHECK_EQ(2, inouts.size());
-
-    CHECK(inputs[0].getData() && inputs[1].getData());
-    CHECK_EQ(inputs[0].dims_.size(), 1);
-    CHECK_EQ(inputs[1].dims_.size(), 2);
-    CHECK_EQ(inouts[0].dims_.size(), 2);
-    CHECK_EQ(inouts[1].dims_.size(), 2);
-
-    /// dim of input grad == dim of weight grad
-    CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]);
-    /// input grad and output grad have the same batch_size
-    CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
-    /// dim of output = dim of input * context_length
-    CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
-
-    typename SequenceT<Device>::type seq_vec(
-        inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
-    const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
-        inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
-    auto in_grad_mat =
-        !inouts[0].getData()
-            ? nullptr
-            : std::make_shared<typename MatrixT<Device>::type>(
-                  inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
-    auto w_grad_mat =
-        !inouts[1].getData()
-            ? nullptr
-            : std::make_shared<typename MatrixT<Device>::type>(
-                  inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]);
->>>>>>> Wei Xu's comments, set up right inouts.

    auto out_grad_mat = outputs[0].matrix<Device>();
    auto in_grad_mat =
@ -279,6 +271,9 @@ private:

 #if 0
 /**
+ * Context Projection Backward Data Function.
+ * Update gradient of the input layer with backprop.
+ *
 * \param inouts[0]    input grad.
 * \param inputs[0]    input sequence.
 * \param inputs[1]    output grad.
@ -326,6 +321,9 @@ private:
 };

 /**
+ * Context Projection Backward Weight Function.
+ * Update weight gradient with backprop.
+ *
 * \param inouts[0]    weight grad.
 * \param inputs[0]    input sequence.
 * \param inputs[1]    output grad.