add some source code comments

ISSUE=4592951 git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1447 1ad973e4-5ce8-4261-8a94-b56d1f490c56
9 years ago · 66be6fed34
parent ff496cd4bf
commit 66be6fed34
15 changed files with 377 additions and 142 deletions
--- a/doc/source/gserver/activations/index.rst
+++ b/doc/source/gserver/activations/index.rst
@ -1,5 +1,5 @@
 Activations
 =============

-..  doxygenfile:: paddle/gserver/activations/ActivationFunction.h
-..  doxygenfile:: paddle/gserver/activations/ActivationFunction.cpp
+..  doxygenclass:: paddle::ActivationFunction
+    :members:
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@ -28,8 +28,17 @@ limitations under the License. */
 namespace paddle {

 static ClassRegistrar<ActivationFunction> gActivationRegistrar;
+/**
+ * @def ACTIVATION_CLASS_NAME
+ * @brief Macro for getting derived activation class name
+ * @note ACTIVATION_CLASS_NAME(softmax) softmax_;
+ * means softmaxActivation softmax_;
+ */
 #define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation
-
+/**
+ * @def BEGIN_DEFINE_ACTIVATION
+ * @brief Macro for defining a devried activation class
+ */
 #define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
  class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
  private:                                                                   \
@ -37,7 +46,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
                                                                             \
  public:                                                                    \
    const std::string& getName() const { return name; }
-
+/**
+ * @def END_DEFINE_ACTIVATION
+ * @brief Macro for registering a derived activation class
+ */
 #define END_DEFINE_ACTIVATION(ACTIVATION_NAME)                     \
  };                                                               \
  const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
@ -66,9 +78,10 @@ static InitFunction __reg_activation__identity([] {
 });

 /**
- * SigmoidActivation
- *
+ * @brief Sigmoid Activation
+ * \f[
 * f(z) = \frac{1}{1+exp(-z)}
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(sigmoid)
 void forward(Argument& act) { act.value->sigmoid(*act.value); }
@ -76,8 +89,10 @@ void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
 END_DEFINE_ACTIVATION(sigmoid)

 /**
- * Do Softmax activation for all sample.
+ * @brief Softmax Activation
+ * \f[
 * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(softmax)
 private:
@ -115,8 +130,12 @@ void backward(Argument& act) {
 }
 END_DEFINE_ACTIVATION(softmax)

-/// Softmax on all frames of one sequence.
-/// Width of frame must be one.
+
+/**
+ * @brief Sequence_softmax Activation
+ * @note Softmax on all frames of one sequence.
+ * Width of frame must be one.
+ */
 BEGIN_DEFINE_ACTIVATION(sequence_softmax)
 private:
 ACTIVATION_CLASS_NAME(softmax) softmax_;
@ -156,8 +175,7 @@ void backward(Argument& act) {
 END_DEFINE_ACTIVATION(sequence_softmax)

 /**
- * Relu Activation.
- *
+ * @brief Relu Activation.
 * forward. y = max(0, z)
 *
 * derivative of relu is:
@ -173,7 +191,7 @@ void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(relu)

 /**
- * BRelu Activation.
+ * @brief BRelu Activation.
 *
 * forward. y = min(24, max(0, z))
 *
@ -192,9 +210,10 @@ void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(brelu)

 /**
- * tanh activation.
- *
+ * @brief Tanh Activation.
+ * \f[
 * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(tanh)
 void forward(Argument& act) { act.value->tanh(*act.value); }
@ -203,9 +222,10 @@ void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
 END_DEFINE_ACTIVATION(tanh)

 /**
- * Scaled Tanh Activation
- *
+ * @brief Scaled Tanh Activation
+ * \f[
 * f(z) = 1.7159 * tanh(2/3*z)
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(stanh)
 private:
@ -221,9 +241,10 @@ void backward(Argument& act) {
 END_DEFINE_ACTIVATION(stanh)

 /**
- * Soft relu activation.
- *
+ * @brief Soft Relu Activation.
+ * \f[
 * f(z) = ln(1+e^z)
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(softrelu)
 void forward(Argument& act) { act.value->softrelu(*act.value); }
@ -232,8 +253,7 @@ void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
 END_DEFINE_ACTIVATION(softrelu)

 /**
- * Abs Activation.
- *
+ * @brief Abs Activation.
 * Forward: f(z) = abs(z)
 *
 * Derivative:
@ -258,9 +278,10 @@ void backward(Argument& act) { act.grad->absDerivative(*act.in); }
 END_DEFINE_ACTIVATION(abs)

 /**
- * Square Activation.
- *
+ * @brief Square Activation.
+ * \f[
 * f(z) = z^2.
+ * \f]
 */
 BEGIN_DEFINE_ACTIVATION(square)
 void forward(Argument& act) {
@ -274,7 +295,12 @@ void forward(Argument& act) {

 void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
 END_DEFINE_ACTIVATION(square)
-
+/**
+ * @brief Exponential Activation.
+ * \f[
+ * f(z) = e^z
+ * \f]
+ */
 BEGIN_DEFINE_ACTIVATION(exponential)
 void forward(Argument& act) { act.value->exp(*act.value); }

--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@ -17,7 +17,18 @@ limitations under the License. */
 #include <string>

 namespace paddle {
+
 struct Argument;
+/**
+ * @brief Activation function is a function that transforms a set of input
+ * signals into an output signals. The purpose of the activation function
+ * is to introduce non-liearilty into the network.
+ *
+ * @note Common activation function are provieded, including linear,
+ * sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh,
+ * softrelu, abs, square, exponential.
+ *
+ */
 class ActivationFunction {
 public:
  static ActivationFunction* create(const std::string& type);
@ -26,16 +37,25 @@ public:

  virtual ~ActivationFunction() {}

-  // act.value <- f(act.value),
-  // where f is the activation function.
-  // Suppose that before calling forward(), act.value is x and
-  // after forward() is called, act.value is y, then y = f(x),
-  // Usually, act is Layer::output_
+  /**
+   * @brief Foward propagation
+   *
+   * act.value <- f(act.value),
+   * where f is the activation function.
+   * Suppose that before calling forward(), act.value is x and
+   * after forward() is called, act.value is y, then y = f(x).
+   *
+   * Usually, act is Layer::output_
+   */
  virtual void forward(Argument& act) = 0;

-  // x and y are defined in the above comment for forward().
-  // Before calling backward(), act.grad = dE / dy, where E is the error/cost.
-  // After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
+  /**
+   * @brief Backward propagaion
+   *
+   * x and y are defined in the above comment for forward().
+   * - Before calling backward(), act.grad = dE / dy, where E is the error/cost
+   * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
+   */
  virtual void backward(Argument& act) = 0;

  virtual const std::string& getName() const = 0;
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@ -41,7 +41,8 @@ limitations under the License. */
 namespace paddle {

 /**
- * @brief Macro for registering a data provider.
+ * @def REGISTER_DATA_PROVIDER
+ * @brief Macro for registering a data provider
 */
 #define REGISTER_DATA_PROVIDER(__type_name, __class_name)               \
  static InitFunction __reg_type_##__type_name([]() {                   \
@ -52,37 +53,68 @@ class DataBatch;
 class BufferBatch;
 typedef std::shared_ptr<DataBatch> DataBatchPtr;
 typedef std::shared_ptr<BufferBatch> BufferBatchPtr;
-
+/**
+ * @brief Data for batch training a neural network
+ */
 class DataBatch {
 public:
  DataBatch() : size_(0) { data_.clear(); }
-
+  /**
+   * @brief Get batch size
+   * @return batch size
+   */
  int64_t getSize() const { return size_; }
-
+  /**
+   * @brief Get num of sequences of sequence data
+   * @return num of sequences
+   */
  int64_t getNumSequences() const {
    if (data_.empty()) return size_;
    return data_[0].sequenceStartPositions
               ? data_[0].sequenceStartPositions->getSize() - 1
               : size_;
  }
-
+  /**
+   * @brief Set batch size
+   * @param[in] size size
+   */
  void setSize(int64_t size) { size_ = size; }
-
+  /**
+   * @brief Get size of argument vector
+   * @return size of argument vector
+   * @note For usual supervised learning, input data and label is needed,
+   * then there will be two argument.
+   */
  int64_t getNumStreams() const { return data_.size(); }

+  /**
+   * @brief Get a argument with index i
+   * @param[in] i index in argument vector
+   * @return a argument with index i
+   */
  const Argument& getStream(int i) const { return data_[i]; }
-
+  /**
+   * @brief Get all argument
+   * @return an argument vector
+   */
  std::vector<Argument>& getStreams() { return data_; }
-
+  /**
+   * @brief Get all argument const
+   * @return an argument vector
+   */
  std::vector<Argument> getStreams() const { return data_; }
-
+  /**
+   * @brief Clear DataBatch
+   */
  void clear() {
    data_.clear();
    size_ = 0;
  }

  /**
-   * The order in which each data stream is appended must match the order
+   * @brief Append data to DataBatch
+   * @param[in] data  matrix data
+   * @note The order in which each data stream is appended must match the order
   * specified in stream_names of DataConfig. The stream_names can be obtained
   * using DataProvider::getStreamNames().
   */
@ -93,7 +125,10 @@ public:
  }

  /**
-   * The order in which each data stream is appended must match the order
+   * @brief Append sequence data to DataBatch
+   * @param[in] data                      matrix data
+   * @param[in] sequenceStartPositions    sequence data
+   * @note The order in which each data stream is appended must match the order
   * specified in stream_names of DataConfig. The stream_names can be obtained
   * using DataProvider::getStreamNames().
   */
@ -104,24 +139,32 @@ public:
    argu.sequenceStartPositions = sequenceStartPositions;
    data_.push_back(argu);
  }
-
+  /**
+   * @brief Append label data
+   * @param[in]  label    label data
+   * @param[in]  value    matrix data, default null
+   */
  void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) {
    Argument argu;
    argu.ids = label;
    argu.value = value;
    data_.push_back(argu);
  }
-
+  /**
+   * @brief Append user defined data
+   * @param[in]  ptr     user defined data
+   */
  void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
    Argument argu;
    argu.udp = ptr;
    data_.push_back(argu);
  }

-  /**
-   * @param argus: DataBatch.getStreams()
-   * @param size: DataBatch.getSize()
-   * @param dataId: sub dataprovider id (in MultiDataProvider)
+  /*
+   * @brief Append argument
+   * @param[in]  argus   DataBatch.getStreams()
+   * @param[in]  size    DataBatch.getSize()
+   * @param[in]  dataId  sub dataprovider id (in MultiDataProvider)
   */
  void appendArguments(const std::vector<Argument>& argus, int size,
                       int dataId) {
@ -133,7 +176,14 @@ public:
  }

 protected:
+  /**
+   * @brief batch size
+   */
  int64_t size_;
+  /**
+   * @brief A batch data consist of a Argument vector,
+   * An argument corresponds to a type of input data.
+   */
  std::vector<Argument> data_;
 };

@ -228,8 +278,8 @@ protected:
 };

 /**
- * DataProvider supplies data for training
- * It can supplies multiple streams of data.
+ * @brief Base class for DataProvider, which supplies data for training
+ * @note It can supplies multiple streams of data.
 * For typical supervised training, there are two streams:
 * one is for input, one is for label.
 */
@ -253,16 +303,23 @@ public:
  const DataConfig& getConfig() const { return config_; }

  void setSkipShuffle() { skipShuffle_ = true; }
+
+  /**
+   * @brief Get next batch of training samples
+   * @param[in]    size    size of training samples to get
+   * @param[out]   batch   a batch of training samples
+   * @return actual size of obtained training samples
+   */
  int64_t getNextBatch(int64_t size, DataBatch* batch);

  /**
-   * Shuffle the data set
+   * @brief Shuffle the data set
   */
  virtual void shuffle() = 0;

  /**
-   * reset() must be called before any calls to getNextBatch()
-   * reset all the value of index
+   * @brief reset all the value of index
+   * @note reset() must be called before any calls to getNextBatch()
   * IMPORTANT: subclass reset() should always call the base class reset()
   * at the end of the function
   */
@ -274,10 +331,17 @@ public:
  }

  /**
-   * return the number of training samples in the data set.
-   * return -1 to indicate unlimited number of samples.
+   * @brief Get the size of training samples
+   * @return the number of training samples in the data set.
+   * @note return -1 to indicate unlimited number of samples.
   */
  virtual int64_t getSize() = 0;
+  /**
+   * @brief Get next batch training samples internally
+   * @param[in]    size      size of training samples to get
+   * @param[out]   batch     a batch of training samples
+   * @return actual size of obtained training samples
+   */

  virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;

@ -288,7 +352,12 @@ protected:
  bool useGpu_;
  std::unique_ptr<DoubleBuffer> doubleBuffer_;
  ThreadLocal<std::vector<MatrixPtr>> constantSlots_;
-
+  /**
+   * @@brief Get next batch training samples from buffer
+   * @param[in]    size      size of training samples to get
+   * @param[out]   batch     a batch of training samples
+   * @return actual size of obtained training samples
+   */
  int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch);

  void initAsyncLoader();
--- a/paddle/gserver/dataproviders/ProtoDataProvider.h
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.h
@ -26,7 +26,9 @@ limitations under the License. */
 namespace paddle {

 /**
- * @brief  Data file with each sample specified by proto message
+ * @brief Provider data from protobuf data file with each sample
+ * specified by proto message
+ *
 * DataSample defined in DataFormat.proto.
 *
 * The file format is
@ -68,19 +70,38 @@ public:
  virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);

 protected:
+  /**
+   * @brief load protobuf data from a list of file
+   * @param[in]  fileName  file name of a file which contains
+   * a list of file names
+   */
  void loadData(const std::string& fileName);
-  void loadDataFile(const std::string& fileName);

+  /**
+   * @brief load protobuf data from file
+   * @param[in]  fileName   data file name
+   */
+  void loadDataFile(const std::string& fileName);
+  /** @brief check data header of each data sample
+   *  @param[in] header     data header read from protobuf data
+   */
  void checkDataHeader(const DataHeader& header);
+  /**
+   * @brief fill protobuf data into slot_,
+   * slot_ is a vector of ProtoSlot in memory.
+   * @param[in]  sample     data sample read from protobuf data
+   */
  void fillSlots(const DataSample& sample);

  /**
-   * return true if each sample is one sequence, i.e., independent
+   * @brief return true if each sample is one sequence, i.e., independent
   * of other samples.
   */
  inline bool iidData() const { return sequenceStartPositions_.empty(); }

-  /// check that sample is consistent with header_
+  /**
+   * @brief check that sample is consistent with header_
+   */
  void checkSample(const DataSample& sample);

  template <class Op>
@ -129,20 +150,21 @@ protected:

  int64_t currentSequenceIndex_;

-  /// The size should be the number of sequences.
+  // The size should be the number of sequences.
  std::vector<size_t> shuffledSequenceIds_;

  ThreadLocalD<DataBatch> cpuBatch_;
  ThreadLocalD<DataBatch> gpuBatch_;

  RWLock lock_;
-  // stats for number of none-zeros entries
-  std::vector<StatPtr> nnzStats_;
+  std::vector<StatPtr> nnzStats_;  // stats for number of none-zeros entries
 };

 /**
- * Special use for Proto data: instances should contain sparse-non-value slots
- * and label. ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
+ * @brief Special use for Proto data: instances should contain sparse-non-value slots
+ * and label.
+ *
+ * @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
 */
 class ProtoSequenceDataProvider : public ProtoDataProvider {
 public:
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@ -33,7 +33,11 @@ void Evaluator::eval(const NeuralNetwork& nn) {
  totalScore_ += score;
  updateSamplesNum(arguments);
 }
-
+/**
+ * @brief classification error Evaluator
+ *
+ * The config file api is classification_error_evaluator.
+ */
 class ClassificationErrorEvaluator : public Evaluator {
 public:
  virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
@ -99,8 +103,11 @@ public:
  }
 };

-// sequence level classification error stats:
-//   if any frame in one sequence has error, the sequence is error
+/**
+ * @brief sequence classification error Evaluator
+ * @note sequence level classification error stats,
+ * if any frame in one sequence has error, the sequence is error
+ */
 class SequenceClassificationErrorEvaluator
    : public ClassificationErrorEvaluator {
 public:
@ -135,7 +142,12 @@ public:
 };
 REGISTER_EVALUATOR(seq_classification_error,
                   SequenceClassificationErrorEvaluator);
-
+/**
+ * @brief sum Evaluator
+ * Calculate the sum of output or label
+ *
+ * The config file api is sum_evaluator.
+ */
 class SumEvaluator : public Evaluator {
 public:
  SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {}
@ -218,13 +230,18 @@ private:
  IVectorPtr cpuLabel_;
  MatrixPtr cpuWeight_;
 };
-
+/**
+ * @brief column sum Evaluator
+ * @note column sum for the colIdx-th column *
+ * - colIdx = 0: the 0-th column.
+ * - colIdx > 0: the colIdx-th column.
+ * - colIdx < 0: the last colIdx-th column.
+ *
+ * The config file api is column_sum_evaluator.
+ *
+ */
 class ColumnSumEvaluator : public Evaluator {
 public:
-  // column sum for the colIdx-th column
-  // colIdx = 0: the 0-th column
-  //         > 0: the colIdx-th column
-  //         < 0: the last colIdx-th column
  explicit ColumnSumEvaluator(int32_t colIdx)
      : colIdx_(colIdx), colNum_(0), sum_(nullptr) {}

@ -845,7 +862,11 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) {
  evaluator->init(config);
  return evaluator;
 }
-
+/**
+ * @brief print value of each layer.
+ *
+ * The config file api is value_printer_evaluator.
+ */
 class ValuePrinter : public Evaluator {
 public:
  ValuePrinter() {}
@ -882,7 +903,11 @@ public:
  virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(value_printer, ValuePrinter);
-
+/**
+ * @brief print gradient of each layer.
+ *
+ * The config file api is gradient_printer_evaluator.
+ */
 class GradientPrinter : public Evaluator {
 public:
  GradientPrinter() {}
@ -908,7 +933,11 @@ public:
  virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(gradient_printer, GradientPrinter);
-
+/**
+ * @brief print row max id vctor of each layer
+ *
+ * The config file api is maxid_printer_evaluator.
+ */
 class MaxIdPrinter : public Evaluator {
 private:
  IVectorPtr maxIds_;
@ -946,7 +975,11 @@ public:
  virtual real evalImp(std::vector<Argument>& arguments) { return 0; }
 };
 REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter);
-
+/**
+ * @brief print sequence max frames of each layer
+ *
+ * The config file api is maxframe_printer_evaluator.
+ */
 class MaxFramePrinter : public Evaluator {
 private:
  IVectorPtr maxIds_;
@ -998,30 +1031,29 @@ public:
 REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);

 /**
- * Sequence text printer will print text according to index matrix and a
- * dictionary. There can be multiple input to this layer:
+ * @brief print text according to index matrix and a dictionary.
 *
- *   1) If there is only one input, the input must be a matrix containing
+ * There can be multiple input to this layer:
+ * - If there is only one input, the input must be a matrix containing
 *      the sequence of indices;
- *
- *   2) If there are more than one input, the first input should be ids,
+ * - If there are more than one input, the first input should be ids,
 *      and are interpreted as sample ids.
 *
 * The output format will be:
 *
- *   1) sequence without sub-sequence, and there is probability.
+ * - sequence without sub-sequence, and there is probability.
 *
 *     @code
 *      id \t prob space_seperated_tokens_from_dictionary_according_to_seq
 *     @endcode
 *
- *   2) sequence without sub-sequence, and there is not probability.
+ * - sequence without sub-sequence, and there is not probability.
 *
 *     @code
 *      id \t space_seperated_tokens_from_dictionary_according_to_seq
 *     @endcode
 *
- *   3) sequence with sub-sequence, and there is not probability.
+ * - sequence with sub-sequence, and there is not probability.
 *
 *     @code
 *      id \t space_seperated_tokens_from_dictionary_according_to_sub_seq
@ -1032,6 +1064,8 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter);
 * Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup
 * with maxid (when generating) as an input.
 *
+ * The config file api is seqtext_printer_evaluator.
+ *
 */
 class SequenceTextPrinter : public Evaluator {
 private:
@ -1172,7 +1206,11 @@ public:
  }
 };
 REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter);
-
+/**
+ * @brief print classification error.
+ *
+ * The config file api is classification_error_printer_evaluator.
+ */
 class ClassificationErrorPrinter : public ClassificationErrorEvaluator {
 public:
  virtual void updateSamplesNum(const std::vector<Argument>& arguments) {}
--- a/paddle/gserver/evaluators/Evaluator.h
+++ b/paddle/gserver/evaluators/Evaluator.h
@ -24,12 +24,21 @@ limitations under the License. */
 namespace paddle {

 class NeuralNetwork;
+/**
+ * @def REGISTER_EVALUATOR
+ * @brief Macro for registering evaluator class
+ */

 #define REGISTER_EVALUATOR(__type_name, __class_name)                \
  static InitFunction __reg_type_##__type_name([]() {                \
    Evaluator::registrar_.registerClass<__class_name>(#__type_name); \
  })
-
+/**
+ * @brief Base class for Evaluator
+ * Evaluating the performance of a model is very important.
+ * It indicates how successful the scores(predictions) of a datasets
+ * has been by a trained model.
+ */
 class Evaluator {
 public:
  static Evaluator* create(const EvaluatorConfig& config);
@ -41,7 +50,7 @@ public:
  virtual void init(const EvaluatorConfig& config) { config_ = config; }

  /**
-   * start to evaluate some data
+   * @brief start to evaluate some data
   */
  virtual void start() {
    numSamples_ = 0;
@ -49,20 +58,21 @@ public:
  }

  /**
-   * Process a batch of data.
+   * @brief Process a batch of data.
   */
  virtual void eval(const NeuralNetwork& nn);

  /**
-   * Process a batch of data.
-   * return the score for the batch if it make sense to sum the score across
-   * batches. Otherwise evaluator should return 0 and override finish() and
+   * @brief Process a batch of data.
+   * @return the score for the batch if it make sense to sum the score across
+   * batches.
+   * @note Otherwise evaluator should return 0 and override finish() and
   * printStats() to do the right calculation.
   */
  virtual real evalImp(std::vector<Argument>& arguments) = 0;

  /**
-   * Update the number of processed samples
+   * @brief Update the number of processed samples
   */
  virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
    numSamples_ += arguments[0].getBatchSize();
@ -81,11 +91,14 @@ public:
  }

  /**
-   * finish the evaluation.
+   * @brief finish the evaluation.
   */
  virtual void finish() {}

-  /// finish() should be called before printStats
+  /**
+   * @brief print the statistics of evaluate result
+   * @note finish() should be called before printStats
+   */
  virtual void printStats(std::ostream& os) {
    os << config_.name() << "="
       << (numSamples_ ? totalScore_ / numSamples_ : 0);
@ -124,17 +137,23 @@ public:
  virtual void finish() {}
  virtual void printStats(std::ostream&) {}
 };
-
-class AucEvaluator : public Evaluator {
-public:
-  /**
+/**
 * @brief evaluate AUC using colIdx-th column as prediction.
+ * The AUC(Area Under the Curve) is a common evaluation metric
+ * for binary classification problems. It computes the area under
+ * the receiver operating characteristic(ROC) curve.
+ *
+ * @note colIdx-th column
 *
 * - colIdx = 0: the 0-th column.
 * - colIdx > 0: the colIdx-th column.
 * - colIdx < 0: the last colIdx-th column.
 *
+ * The config file api is auc_evaluator.
+ *
 */
+class AucEvaluator : public Evaluator {
+public:
  AucEvaluator(int32_t colIdx)
      : colIdx_(colIdx),
        realColumnIdx_(0),
@ -174,13 +193,11 @@ private:
 };

 /**
- * @brief RankAucEvaluator calculates the AUC of each list
- * (i.e., titles under the same query), and averages them.
- *
- * Each list should be organized as a sequence.
- * The inputs of this evaluator is [output, click, pv].
- * If pv is not provided, it will be set to 1.
- * The types of click and pv are dense value.
+ * @brief RankAucEvaluator calculates the AUC of each list (i.e., titles
+ * under the same query), and averages them. Each list should be organized
+ * as a sequence. The inputs of this evaluator is [output, click, pv]. If pv
+ * is not provided, it will be set to 1. The types of click and pv are
+ * dense value.
 */
 class RankAucEvaluator : public Evaluator {
 public:
@ -204,7 +221,16 @@ private:
  double calcRankAuc(real* outputData, real* clickData, real* pvData,
                     size_t size);
 };
-
+/**
+ * @brief precision, recall and f1 score Evaluator
+ * \f[
+ * precision = \frac{tp}{tp+tn} \\
+ * recall=\frac{tp}{tp+fn} \\
+ * f1=2*\frac{precsion*recall}{precision+recall}
+ * \f]
+ *
+ * The config file api is precision_recall_evaluator.
+ */
 class PrecisionRecallEvaluator : public Evaluator {
 public:
  // Evaluate precision, recall and F1 score
@ -274,8 +300,10 @@ private:
  }
 };

-/**
- * Positive-negative pair rate Evaluator
+/*
+ * @brief positive-negative pair rate Evaluator
+ *
+ * The config file api is pnpair_evaluator.
 */
 class PnpairEvaluator : public Evaluator {
 public:
--- a/paddle/gserver/layers/CosSimLayer.h
+++ b/paddle/gserver/layers/CosSimLayer.h
@ -20,7 +20,19 @@ limitations under the License. */
 #include "paddle/utils/ThreadLocal.h"

 namespace paddle {
-
+/**
+ * @brief A layer for calculating cosine similarity between two vector
+ * \f[
+ * f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+...
+ * +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}}
+ * \f]
+ *
+ * - Input1: A vector (batchSize * dataDim) *
+ * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) *
+ * - Output: A vector (dataDim * 1)
+ *
+ * The config file api is cos_sim.
+ */
 class CosSimLayer : public Layer {
 public:
  explicit CosSimLayer(const LayerConfig& config)
--- a/paddle/gserver/layers/CosSimVecMatLayer.cpp
+++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp
@ -21,13 +21,16 @@ limitations under the License. */
 namespace paddle {

 /**
- * A layer for computing cosine similarity between a vector an each row of a
- * matrix,
+ * @brief A layer for computing cosine similarity between a vector
+ * and each row of a matrix
 * out[i] = cos_scale * cos(in1, in2(i,:));
- * which is used in NEURAL TURING MACHINE
- * Input: a vector (batchSize x dataDim) and a matrix in vec form (batchSize x
- * (weightDim*dataDim))
- * Output: a vector (batchSize x weightDim)
+ * @note used in NEURAL TURING MACHINE
+ *
+ * Input1: a vector (batchSize * dataDim)
+ *
+ * Input2: a matrix in vector form (batchSize * (weightDim*dataDim))
+ *
+ * Output: a vector (batchSize * weightDim)
 */

 class CosSimVecMatLayer : public Layer {
--- a/paddle/gserver/layers/DataNormLayer.h
+++ b/paddle/gserver/layers/DataNormLayer.h
@ -22,18 +22,18 @@ limitations under the License. */
 namespace paddle {

 /**
- * A layer for data normalization
- * Input: One and only one input layer is accepted. The input layer must
+ * @brief A layer for data normalization
+ * - Input: One and only one input layer is accepted. The input layer must
 *        be DataLayer with dense data type.
- * Output: The normalization of the input data
+ * - Output: The normalization of the input data
 *
 * Reference:
 *    LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine
 *
 * Three data normalization methoeds are considered
- *    z-score: y = (x-mean)/std
- *    min-max: y = (x-min)/(max-min)
- *    decimal-scaling: y = x/10^j, where j is the smallest integer such that
+ * - z-score: y = (x-mean)/std
+ * - min-max: y = (x-min)/(max-min)
+ * - decimal-scaling: y = x/10^j, where j is the smallest integer such that
 *max(|y|)<1
 */

--- a/paddle/gserver/layers/NormLayer.h
+++ b/paddle/gserver/layers/NormLayer.h
@ -23,8 +23,9 @@ limitations under the License. */
 namespace paddle {

 /**
- * @brief basic parent layer of normalization
- * Normalize the input in local region
+ * @brief Basic parent layer of normalization
+ *
+ * @note Normalize the input in local region
 */
 class NormLayer : public Layer {
 public:
@ -35,7 +36,9 @@ public:
    return true;
  }

-  // create norm layer by norm_type
+  /**
+   * @brief create norm layer by norm_type
+   */
  static Layer* create(const LayerConfig& config);
 };

--- a/paddle/gserver/layers/OuterProdLayer.cpp
+++ b/paddle/gserver/layers/OuterProdLayer.cpp
@ -21,10 +21,11 @@ limitations under the License. */
 namespace paddle {

 /**
- * A layer for computing the outer product of two vectors,
- * which is used in NEURAL TURING MACHINE
- * Input: two vectors: batchSize x dim1, batchSize x dim2
- * Output: a matrix: (batchSize x (dim1*dim2))
+ * @brief A layer for computing the outer product of two vectors
+ * @note used in NEURAL TURING MACHINE
+ * Input1: vector (batchSize * dim1)
+ * Input2: vector (batchSize * dim2)
+ * Output: a matrix: (batchSize * (dim1*dim2))
 */

 class OuterProdLayer : public Layer {
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {

 /**
- * @brief basic parent layer of pooling
+ * @brief Basic parent layer of pooling
 * Pools the input within regions
 */
 class PoolLayer : public Layer {
@ -41,7 +41,9 @@ protected:
 public:
  explicit PoolLayer(const LayerConfig& config) : Layer(config) {}

-  // create pooling layer by pool_type
+  /**
+   * @brief create pooling layer by pool_type
+   */
  static Layer* create(const LayerConfig& config);

  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
--- a/paddle/gserver/layers/PoolProjectionLayer.h
+++ b/paddle/gserver/layers/PoolProjectionLayer.h
@ -20,7 +20,9 @@ limitations under the License. */
 #include <vector>

 namespace paddle {
-
+/**
+ * @brief Basic parent layer of different kinds of pooling
+ */
 class PoolProjectionLayer : public PoolLayer {
 protected:
  size_t imgSizeH_, imgSizeW_;
@ -30,7 +32,9 @@ public:
  size_t getSize();
  explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
 };
-
+/**
+ * @brief A layer for max pooling
+ */
 class MaxPoolProjectionLayer : public PoolProjectionLayer {
 public:
  explicit MaxPoolProjectionLayer(const LayerConfig& config)
@ -41,7 +45,9 @@ public:
  virtual void forward(PassType passType);
  virtual void backward(const UpdateCallback& callback = nullptr);
 };
-
+/**
+ * @brief A layer for average pooling
+ */
 class AvgPoolProjectionLayer : public PoolProjectionLayer {
 public:
  explicit AvgPoolProjectionLayer(const LayerConfig& config)
--- a/paddle/gserver/layers/ResizeLayer.cpp
+++ b/paddle/gserver/layers/ResizeLayer.cpp
@ -18,7 +18,12 @@ limitations under the License. */
 #include "paddle/math/BaseMatrix.h"

 namespace paddle {
-/* resize a minibatch matrix h*w to h'*w' */
+/**
+ * @brief A layer for resizing a minibatch matrix h*w to h'*w'
+ * @note
+ * origin matrix height * witdth)
+ * resize matrix: (height * width / size) * size
+ */
 class ResizeLayer : public Layer {
 public:
  explicit ResizeLayer(const LayerConfig& config) : Layer(config) {}