update code

8 years ago · 8d47499e1d
parent 47e88f4a2d f174c00e7e
commit 8d47499e1d
40 changed files with 1043 additions and 583 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -25,9 +25,9 @@ addons:
    packages:
      - gcc-4.8
      - g++-4.8
+      - gfortran-4.8
      - git
      - build-essential
-      - libatlas-base-dev
      - python
      - python-pip
      - python2.7-dev
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@ -16,7 +16,7 @@
 set(CBLAS_FOUND OFF)

 ## Find MKL First.
-set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
+set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")

 find_path(MKL_INCLUDE_DIR mkl.h PATHS
  ${MKL_ROOT}/include)
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@ -15,7 +15,6 @@
 INCLUDE(cblas)

 IF(NOT ${CBLAS_FOUND})
-    MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
    INCLUDE(ExternalProject)

    SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
        SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
    ENDIF(WIN32)

+    IF(CMAKE_COMPILER_IS_GNUCC)
+        ENABLE_LANGUAGE(Fortran)
+        LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
+    ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+    IF(NOT CMAKE_Fortran_COMPILER)
+        MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
+                "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
+    ENDIF(NOT CMAKE_Fortran_COMPILER)
+
    ExternalProject_Add(
        openblas
        ${EXTERNAL_PROJECT_LOG_ARGS}
-        URL                 "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
+        GIT_REPOSITORY      https://github.com/xianyi/OpenBLAS.git
+        GIT_TAG             v0.2.19
        PREFIX              ${CBLAS_SOURCES_DIR}
        INSTALL_DIR         ${CBLAS_INSTALL_DIR}
        BUILD_IN_SOURCE     1
-        CONFIGURE_COMMAND   ""
-        BUILD_COMMAND       make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
-        INSTALL_COMMAND     make install PREFIX=<INSTALL_DIR>
+        BUILD_COMMAND       ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
+        INSTALL_COMMAND     ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
        UPDATE_COMMAND      ""
+        CONFIGURE_COMMAND   ""
+    )
+
+    ExternalProject_Add_Step(
+        openblas lapacke_install
+        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
+        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
+        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
+        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
+        DEPENDEES install
    )

    LIST(APPEND external_project_dependencies openblas)
-ENDIF()
+ENDIF(NOT ${CBLAS_FOUND})

 INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -54,6 +54,7 @@ ExternalProject_Add(
  CONFIGURE_COMMAND
    ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
    -Dprotobuf_BUILD_TESTS=OFF
+    -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
    -DCMAKE_BUILD_TYPE=Release
    -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
        "please use pip to upgrade protobuf.")
    ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
 ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
+    MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
    ##################################### PYTHON ########################################
    SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
    SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@ -96,6 +96,7 @@ set(COMMON_FLAGS
    -Wno-unused-parameter
    -Wno-unused-function
    -Wno-error=literal-suffix
+    -Wno-error=sign-compare
    -Wno-error=unused-local-typedefs)

 set(GPU_COMMON_FLAGS
@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
    -Wno-unused-function
+    -Wno-error=sign-compare
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs
    -Wno-error=unused-function  # Warnings in Numpy Header.
--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@ -21,6 +21,7 @@ ELSE(WIN32)
        SET(MACOS_VERSION ${VERSION})
        SET(HOST_SYSTEM "macosx")
    ELSE(APPLE)
+
        IF(EXISTS "/etc/issue")
            FILE(READ "/etc/issue" LINUX_ISSUE)
            IF(LINUX_ISSUE MATCHES "CentOS")
@ -31,6 +32,14 @@ ELSE(WIN32)
                SET(HOST_SYSTEM "ubuntu")
            ENDIF()
        ENDIF(EXISTS "/etc/issue")
+
+        IF(EXISTS "/etc/redhat-release")
+            FILE(READ "/etc/redhat-release" LINUX_ISSUE)
+            IF(LINUX_ISSUE MATCHES "CentOS")
+                SET(HOST_SYSTEM "centos")
+            ENDIF()
+        ENDIF(EXISTS "/etc/redhat-release")
+
    ENDIF(APPLE)
 ENDIF(WIN32)

@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
    LOG_DOWNLOAD    0     # Wrap download in script to log output
    LOG_UPDATE      1     # Wrap update in script to log output
    LOG_CONFIGURE   1     # Wrap configure in script to log output
-    LOG_BUILD       1     # Wrap build in script to log output
+    LOG_BUILD       0     # Wrap build in script to log output
    LOG_TEST        1     # Wrap test in script to log output
-    LOG_INSTALL     1     # Wrap install in script to log output
+    LOG_INSTALL     0     # Wrap install in script to log output
 )
--- a/doc/getstarted/build_and_install/build_from_source_en.md
+++ b/doc/getstarted/build_and_install/build_from_source_en.md
@ -4,6 +4,8 @@ Installing from Sources
 * [1. Download and Setup](#download)
 * [2. Requirements](#requirements)
 * [3. Build on Ubuntu](#ubuntu)
+* [4. Build on Centos](#centos)
+

 ## <span id="download">Download and Setup</span> 
 You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
@ -64,7 +66,8 @@ As a simple example, consider the following:

 1. **BLAS Dependencies(optional)**
  
-    Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
+    CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
+    To utilize preinstalled BLAS， you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.

    ```bash
    # specify MKL
@ -99,7 +102,7 @@ As a simple example, consider the following:
    ```bash
    # necessary
    sudo apt-get update
-    sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git
+    sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
    sudo pip install wheel numpy
    sudo pip install 'protobuf>=3.0.0'
    ```
@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH
 # install PaddlePaddle Python modules.
 sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
 ```
+## <span id="centos">Build on Centos 7</span>
+
+### Install Dependencies
+
+- **CPU Dependencies**
+
+    ```bash
+    # necessary
+    sudo yum update
+    sudo yum install -y epel-release
+    sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
+    sudo pip install wheel numpy
+    sudo pip install 'protobuf>=3.0.0'
+    ```
+  
+- **GPU Dependencies (optional)**
+
+    To build GPU version, you will need the following installed:
+
+        1. a CUDA-capable GPU
+        2. A supported version of Linux with a gcc compiler and toolchain
+        3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
+        4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
+
+    The CUDA development environment relies on tight integration with the host development environment,
+    including the host compiler and C runtime libraries, and is therefore only supported on
+    distribution versions that have been qualified for this CUDA Toolkit release.
+        
+    After downloading cuDNN library, issue the following commands:
+
+    ```bash
+    sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
+    sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
+    ```
+    Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
+
+    ```bash
+    export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+    export PATH=/usr/local/cuda/bin:$PATH
+    ```
+
+### Build and Install
+
+As usual, the best option is to create build folder under paddle project directory.
+
+```bash
+mkdir build && cd build
+``` 
+
+Finally, you can build and install PaddlePaddle:
+
+```bash
+# you can add build option here, such as:    
+cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
+# please use sudo make install, if you want to install PaddlePaddle into the system
+make -j `nproc` && make install
+# set PaddlePaddle installation path in ~/.bashrc
+export PATH=<path to install>/bin:$PATH
+# install PaddlePaddle Python modules.
+sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
+```
--- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
+++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
@ -32,7 +32,7 @@ pooling_layer 的使用示例如下，详细见 :ref:`api_trainer_config_helpers
        
 - `pooling_type` 目前支持两种，分别是：MaxPooling()和AvgPooling()。

- `agg_level=AggregateLevel.TIMESTEP` 时（默认值）：
+- `agg_level=AggregateLevel.EACH_TIMESTEP` 时（默认值）：

  - 作用：双层序列经过运算变成一个0层序列，或单层序列经过运算变成一个0层序列
  - 输入：一个双层序列，或一个单层序列
@ -54,7 +54,7 @@ last_seq 的使用示例如下（ :ref:`api_trainer_config_helpers_layers_first_
        last = last_seq(input=layer,
                        agg_level=AggregateLevel.EACH_SEQUENCE)
        
- `agg_level=AggregateLevel.TIMESTEP` 时（默认值）：
+- `agg_level=AggregateLevel.EACH_TIMESTEP` 时（默认值）：

  - 作用：一个双层序列经过运算变成一个0层序列，或一个单层序列经过运算变成一个0层序列
  - 输入：一个双层序列或一个单层序列
--- a/doc/howto/usage/k8s/k8s_aws_en.md
+++ b/doc/howto/usage/k8s/k8s_aws_en.md
--- a/doc/howto/usage/k8s/src/pserver_and_trainer.png
+++ b/doc/howto/usage/k8s/src/pserver_and_trainer.png
--- a/paddle/function/BufferArg.cpp
+++ b/paddle/function/BufferArg.cpp
@ -20,23 +20,27 @@ limitations under the License. */
 namespace paddle {

 const SequenceArg& BufferArg::sequence() const {
-  // CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
+  CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
  return dynamic_cast<const SequenceArg&>(*this);
 }

 const SparseMatrixArg& BufferArg::sparse() const {
-  // CHECK_EQ(bufferType_, TENSOR_SPARSE);
+  CHECK_EQ(bufferType_, TENSOR_SPARSE);
  return dynamic_cast<const SparseMatrixArg&>(*this);
 }

 SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+  bufferType_ = TENSOR_SPARSE;
+}

 SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+  bufferType_ = TENSOR_SPARSE;
+}

 }  // namespace paddle
--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@ -23,10 +23,11 @@ limitations under the License. */
 namespace paddle {

 enum BufferType {
-  TENSOR_NORMAL = 0,
-  TENSOR_SEQUENCE_ID = 1,
-  TENSOR_SEQUENCE_DATA = 2,
-  TENSOR_SPARSE = 3
+  TENSOR_UNKNOWN = 0,
+  TENSOR_NORMAL = 1,
+  TENSOR_SEQUENCE_ID = 2,
+  TENSOR_SEQUENCE_DATA = 3,
+  TENSOR_SPARSE = 4
 };

 enum SparseDataType {
@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
 class BufferArg;
 class SequenceArg;
 class SparseMatrixArg;
-typedef std::shared_ptr<BufferArg> BufferArgPtr;

 /**
 * \brief BufferArg used as the argument type of Function.
@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
 * 3. SequenceArg for a Buffer of sequence data.
 * 4. SparseMatrixArg for a Buffer of sparse matrix.
 *
+ * Buffer shape
+ * For most buffers, the first dimension `shape()[0]` represents
+ * the size of the mini-batch.
+ *
+ * Buffer argType
 * There is an ArgType property for the BufferArg used as Function Output.
 * Whether the result of the Function calculation is assigned to the
 * output Buffer or added to the output Buffer is determined by the
@ -71,6 +76,14 @@ public:
  ArgType getArgType() const { return argType_; }

 public:
+  BufferArg(ValueType valueType,
+            const TensorShape& shape,
+            ArgType argType = UNSPECIFIED)
+      : buf_(nullptr),
+        valueType_(valueType),
+        shape_(shape),
+        argType_(argType) {}
+
  BufferArg(void* buf,
            ValueType valueType,
            const TensorShape& shape,
@ -86,6 +99,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(2),
        argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, matrix.getHeight());
    shape_.setDim(1, matrix.getWidth());
  }
@ -98,6 +112,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(shape),
        argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
    CHECK_EQ(matrix.getElementCnt(), shape.getElements());
  }

@ -107,6 +122,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(1),
        argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, vector.getSize());
  }

@ -116,6 +132,7 @@ public:
        valueType_(VALUE_TYPE_INT32),
        shape_(1),
        argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, vector.getSize());
  }

@ -150,6 +167,8 @@ public:
  ValueType valueType() const { return valueType_; }
  BufferType bufferType() const { return bufferType_; }
  const TensorShape& shape() const { return shape_; }
+  bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
+  bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }

  const SequenceArg& sequence() const;
  const SparseMatrixArg& sparse() const;
@ -158,8 +177,8 @@ protected:
  void* buf_;
  ValueType valueType_;
  TensorShape shape_;
-  BufferType bufferType_;
-  ArgType argType_ = UNSPECIFIED;
+  BufferType bufferType_{TENSOR_UNKNOWN};
+  ArgType argType_{UNSPECIFIED};
  // leading dimensions. The size is dims_.size()
  // Dims lds_;
 };
@ -170,15 +189,24 @@ protected:
 // if a < b then value_.buf_[a] < value_.buf_[b]
 class SequenceIdArg : public BufferArg {
 public:
+  SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
+      : BufferArg(VALUE_TYPE_INT32, shape, argType) {
+    CHECK_EQ(shape_.ndims(), (size_t)1);
+    CHECK_GT(shape_[0], 1);
+    numSeqs_ = shape_[0] - 1;
+  }
+
  SequenceIdArg(void* buf,
                const TensorShape& shape,
                ArgType argType = UNSPECIFIED)
      : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
+    bufferType_ = TENSOR_SEQUENCE_ID;
    CHECK_EQ(shape_.ndims(), (size_t)1);
    numSeqs_ = shape_[0] - 1;
  }

  SequenceIdArg(const IVector& vector) : BufferArg(vector) {
+    bufferType_ = TENSOR_SEQUENCE_ID;
    numSeqs_ = shape_[0] - 1;
  }

@ -190,26 +218,41 @@ private:
  size_t numSeqs_;
 };

-// sequence data
+// sequences data
+// For mini-batch calculate,
+// one batch can contain more than one sequence of data.
+// SequenceArg can be used to represent sequences that contain multiple
+// unequal lengths.
 class SequenceArg : public BufferArg {
 public:
+  SequenceArg(ValueType valueType,
+              const TensorShape& shape,
+              ArgType argType = UNSPECIFIED)
+      : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
+
  SequenceArg(void* buf,
              ValueType valueType,
              const TensorShape& shape,
              const SequenceIdArg& startPositions,
              ArgType argType = UNSPECIFIED)
      : BufferArg(buf, valueType, shape, argType),
-        startPositions_(startPositions) {}
+        startPositions_(startPositions) {
+    bufferType_ = TENSOR_SEQUENCE_DATA;
+  }

  SequenceArg(const Matrix& matrix,
              const IVector& vector,
              ArgType argType = UNSPECIFIED)
-      : BufferArg(matrix, argType), startPositions_(vector) {}
+      : BufferArg(matrix, argType), startPositions_(vector) {
+    bufferType_ = TENSOR_SEQUENCE_DATA;
+  }

  ~SequenceArg() {}

  void* getIdBuf() const { return startPositions_.data(); }
  size_t numSeqs() const { return startPositions_.numSeqs(); }
+  SequenceIdArg& getSequenceId() { return startPositions_; }
+  const SequenceIdArg& getSequenceId() const { return startPositions_; }

 private:
  SequenceIdArg startPositions_;
@ -235,6 +278,7 @@ public:
        nnz_(nnz),
        format_(format),
        type_(type) {
+    bufferType_ = TENSOR_SPARSE;
    CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
    CHECK_EQ(shape_.ndims(), (size_t)2);
    CHECK_EQ(row_.shape().ndims(), (size_t)1);
--- a/paddle/function/BufferArgTest.cpp
+++ b/paddle/function/BufferArgTest.cpp
@ -14,9 +14,7 @@ limitations under the License. */

 #include "BufferArg.h"
 #include <gtest/gtest.h>
-#include "Function.h"
 #include "paddle/math/MemoryHandle.h"
-#include "paddle/math/SparseMatrix.h"

 namespace paddle {

@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
  EXPECT_EQ(buffer.numSeqs(), 9);
 }

-TEST(BufferTest, asArgument) {
-  MatrixPtr matrix = Matrix::create(100, 200);
-  VectorPtr vector = Vector::create(100, false);
-  CpuSparseMatrix sparse(200, 300, 50);
-
-  // prepare arguments
-  BufferArgs argments;
-  argments.addArg(*matrix);
-  argments.addArg(*vector);
-  argments.addArg(sparse);
-
-  // function
-  auto function = [=](const BufferArgs& inputs) {
-    EXPECT_EQ(inputs.size(), 3);
-
-    // check inputs[0]
-    EXPECT_EQ(inputs[0].shape().ndims(), 2);
-    EXPECT_EQ(inputs[0].shape()[0], 100);
-    EXPECT_EQ(inputs[0].shape()[1], 200);
-    EXPECT_EQ(inputs[0].data(), matrix->getData());
-
-    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
-              matrix->getHeight());
-    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
-              matrix->getWidth());
-    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
-
-    // check inputs[1]
-    EXPECT_EQ(inputs[1].shape().ndims(), 1);
-    EXPECT_EQ(inputs[1].shape()[0], 100);
-    EXPECT_EQ(inputs[1].data(), vector->getData());
-    CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
-    EXPECT_EQ(inVector.getSize(), vector->getSize());
-    EXPECT_EQ(inVector.getData(), vector->getData());
-
-    // check inputs[2]
-    EXPECT_EQ(inputs[2].shape().ndims(), 2);
-    EXPECT_EQ(inputs[2].shape()[0], 200);
-    EXPECT_EQ(inputs[2].shape()[1], 300);
-    EXPECT_EQ(inputs[2].data(), sparse.getData());
-    // CHECK_EQ(inputs[2].sparse().nnz(), 50);
-    // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
-    // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
-    EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
-    EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
-  };
-
-  // call function
-  function(argments);
-}
-
 }  // namespace paddle
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@ -19,13 +19,13 @@ if(WITH_TESTING)
    # TODO:
    # file(GLOB test_files . *OpTest.cpp)
    # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
-    # add_simple_unittest(CrossMapNormalOpTest)
+    add_simple_unittest(CrossMapNormalOpTest)
    add_simple_unittest(TensorShapeTest)
    add_simple_unittest(TensorTypeTest)
    add_simple_unittest(BufferArgTest)
    add_simple_unittest(FunctionTest)
+    add_simple_unittest(ContextProjectionOpTest)
    add_simple_unittest(PadOpTest)
-    # add_simple_unittest(ContextProjectionOpTest)
 endif()
 endif()

--- a/paddle/function/ContextProjectionOp.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
--- a/paddle/function/ContextProjectionOp.h
+++ b/paddle/function/ContextProjectionOp.h
@ -21,14 +21,14 @@ namespace paddle {
 /**
 * \brief   Context Projection Forward.
 *
- * \param[out]  outputs           output data.
- * \param[in]   input             input data.
- * \param[in]   weight            input weight.
- * \param[in]   sequence          input data.
- * \param[in]   context_length    consecutive rows for concatenation.
- * \param[in]   context_start     context start position.
- * \param[in]   begin_pad         begining pad position.
- * \param[in]   is_padding        whether padding 0 or not.
+ * \param[in/out]  outputs           output data.
+ * \param[in]      input             input data.
+ * \param[in]      weight            input weight.
+ * \param[in]      sequence          input data.
+ * \param[in]      context_length    consecutive rows for concatenation.
+ * \param[in]      context_start     context start position.
+ * \param[in]      begin_pad         begining pad position.
+ * \param[in]      is_padding        whether padding 0 or not.
 *
 */
 template <DeviceType DType>
@ -56,7 +56,7 @@ void ContextProjectionForward(
 */
 template <DeviceType DType>
 void ContextProjectionBackward(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& in_grad,
    typename Tensor<real, DType>::Matrix& w_grad,
    const typename Tensor<int, DType>::Vector& seq_vec,
@ -68,7 +68,7 @@ void ContextProjectionBackward(

 template <DeviceType DType>
 void ContextProjectionBackwardData(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& in_grad,
    const typename Tensor<int, DType>::Vector& sequence,
    size_t context_length,
@ -76,7 +76,7 @@ void ContextProjectionBackwardData(

 template <DeviceType DType>
 void ContextProjectionBackwardWeight(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& w_grad,
    const typename Tensor<int, DType>::Vector& seq_vec,
    size_t context_length,
--- a/paddle/function/ContextProjectionOpGpu.cu
+++ b/paddle/function/ContextProjectionOpGpu.cu
@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
                                begin_pad);
 }

-__global__ void KeContextProjectionBackwardData(real* out_grad,
+__global__ void KeContextProjectionBackwardData(const real* out_grad,
                                                const int* sequence,
                                                real* in_grad,
-                                                int input_dim,
+                                                size_t input_dim,
                                                int context_length,
                                                int context_start) {
  int idx = threadIdx.x;
@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
  real value = 0;

  int instances = seq_end - seq_start + context_length - 1;
-  out_grad += seq_start * input_dim * context_length;
+  auto out = const_cast<real*>(out_grad);
+  out += seq_start * input_dim * context_length;
  in_grad += seq_start * input_dim;
  for (int k = 0; k <= input_dim / block_size; k++) {
    if (idx < input_dim) {
@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
        int outx = (i - context_length) < 0 ? i : (context_length - 1);
        int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
        real* output_r =
-          out_grad + outy * input_dim * context_length + outx * input_dim;
+          out + outy * input_dim * context_length + outx * input_dim;
        for (int j = outy; j < seq_end - seq_start; j++) {
          value += output_r[idx];
          if (j - outy == outx) break;
@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
 * @param[in]   context_start    context start.
 *
 */
-void hl_context_projection_backward_data(real* out_grad,
+void hl_context_projection_backward_data(const real* out_grad,
                                         const int* sequence,
                                         real* input_grad,
                                         size_t num_sequences,
@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
 }

 template <>
-void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
+void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
                                                    GpuMatrix& in_grad,
                                                    const GpuIVector& sequence,
                                                    size_t context_length,
@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
 }

 template<int THREADS_X, int THREADS_Y>
-__global__ void KeContextProjectionBackwardWeight(real* out_grad,
+__global__ void KeContextProjectionBackwardWeight(const real* out_grad,
                                                  const int* sequence,
                                                  real* w_grad,
                                                  int num_sequences,
@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
    for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
      int seq_start = sequence[seqId];
      int seq_end = sequence[seqId+1];
-      output_r = out_grad + seq_start * w_dim * context_length;
+      output_r = const_cast<real*>(out_grad)
+                    + seq_start * w_dim * context_length;

      if (context_start < 0) {
        if (padId + context_start < 0) {
@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
 * beginning.
 *
 */
-void hl_context_projection_backward_weight(real* out_grad,
+void hl_context_projection_backward_weight(const real* out_grad,
                                           const int* sequence,
                                           real* w_grad,
                                           size_t num_sequences,
@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,

 template <>
 void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
-        GpuMatrix& out_grad,
+        const GpuMatrix& out_grad,
        GpuMatrix& w_grad,
        const GpuIVector& seq_vec,
        size_t context_length,
@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
 }

 template <>
-void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
+void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
                                                GpuMatrix& in_grad,
                                                GpuMatrix& w_grad,
                                                const GpuIVector& sequence,
--- a/paddle/function/ContextProjectionOpTest.cpp
+++ b/paddle/function/ContextProjectionOpTest.cpp
@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
  cpu_out.randomizeUniform();
  gpu_out.copyFrom(cpu_out);

-  compare.getCpuFunction()->calc(
-      {Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
-       Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
-              Dims{pad, input_dim}),
-       Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
-              Dims{cpu_seq->getSize()})},
-      {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
-      {});
-  compare.getGpuFunction()->calc(
-      {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
-       Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
-              Dims{pad, input_dim}),
-       Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
-              Dims{gpu_seq->getSize()})},
-      {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
-      {});
+  BufferArgs cpu_inputs;
+  BufferArgs cpu_outputs;
+  cpu_inputs.addArg(cpu_in, *cpu_seq);
+  if (cpu_weight) {
+    cpu_inputs.addArg(*cpu_weight, *cpu_seq);
+  }
+  cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
+
+  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
+
+  BufferArgs gpu_inputs;
+  BufferArgs gpu_outputs;
+  gpu_inputs.addArg(gpu_in, *gpu_seq);
+  if (gpu_weight) {
+    gpu_inputs.addArg(*gpu_weight, *gpu_seq);
+  }
+  gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
+
+  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);

  autotest::TensorCheckEqual(cpu_out, gpu_out);
 }
@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
    gpu_w_grad->copyFrom(*cpu_w_grad);
  }

-  compare.getCpuFunction()->calc(
-      {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
-       Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
-              Dims{pad, input_dim}),
-       Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
-              Dims{cpu_seq->getSize()})},
-      {Tensor(cpu_out_grad.getData(),
-              Dims{batch_size, input_dim * context_length})},
-      {});
-
-  compare.getGpuFunction()->calc(
-      {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
-       Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
-              Dims{pad, input_dim}),
-       Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
-              Dims{gpu_seq->getSize()})},
-      {Tensor(gpu_out_grad.getData(),
-              Dims{batch_size, input_dim * context_length})},
-      {});
+  BufferArgs cpu_inputs;
+  BufferArgs cpu_outputs;
+  cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
+  cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
+  cpu_outputs.addArg(
+      cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
+
+  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
+
+  BufferArgs gpu_inputs;
+  BufferArgs gpu_outputs;
+  gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
+  gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
+  gpu_outputs.addArg(
+      gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
+
+  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);

  autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
  if (is_padding) {
--- a/paddle/function/CrossMapNormalOp.cpp
+++ b/paddle/function/CrossMapNormalOp.cpp
@ -188,8 +188,13 @@ public:
    CHECK(inputs[0].shape() == inputs[3].shape());
    CHECK(inputs[0].shape() == outputs[0].shape());

-    // TODO(hedaoyuan): need support ASSIGN_TO mode.
-    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
+    if (outputs[0].getArgType() != ADD_TO) {
+      // Currently, some algorithm implementations are ASSIGN_TO mode,
+      // if need to support the ADD_TO calculation, need to clear the output.
+      typename Tensor<real, Device>::Vector tmp(
+          outputs[0].shape().getElements(), outputs[0].data<real>());
+      tmp.zero();
+    }

    size_t samples = inputs[0].shape()[0];
    size_t channels = inputs[0].shape()[1];
--- a/paddle/function/CrossMapNormalOpTest.cpp
+++ b/paddle/function/CrossMapNormalOpTest.cpp
@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
                    << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
                    << " size=" << size;

-            FunctionCompare compare("CrossMapNormal",
-                                    FuncConfig()
-                                        .set("size", size)
-                                        .set("scale", (real)1.5)
-                                        .set("pow", (real)0.5));
-            Dims dims{numSamples, channels, imgSizeH, imgSizeW};
-            compare.cmpWithArg({Tensor(nullptr, dims)},
-                               {Tensor(nullptr, dims), Tensor(nullptr, dims)},
-                               {});
+            // init Test object
+            FunctionCompare test("CrossMapNormal",
+                                 FuncConfig()
+                                     .set("size", size)
+                                     .set("scale", (real)1.5)
+                                     .set("pow", (real)0.5));
+            // prepare input arguments
+            TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            // run Function
+            test.run();
          }
        }
      }
@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
                    << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
                    << " size=" << size;

-            FunctionCompare compare("CrossMapNormalGrad",
-                                    FuncConfig()
-                                        .set("size", size)
-                                        .set("scale", (real)1.5)
-                                        .set("pow", (real)0.5));
-            Dims dims{numSamples, channels, imgSizeH, imgSizeW};
-            compare.cmpWithArg({Tensor(nullptr, dims),
-                                Tensor(nullptr, dims),
-                                Tensor(nullptr, dims),
-                                Tensor(nullptr, dims)},
-                               {Tensor(nullptr, dims)},
-                               {});
+            FunctionCompare test("CrossMapNormalGrad",
+                                 FuncConfig()
+                                     .set("size", size)
+                                     .set("scale", (real)1.5)
+                                     .set("pow", (real)0.5));
+            TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
+            // run Function
+            test.run();
          }
        }
      }
--- a/paddle/function/Function.cpp
+++ b/paddle/function/Function.cpp
@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
 void BufferArgs::addArg(const Matrix& arg,
                        const TensorShape& shape,
                        ArgType argType) {
-  args_.push_back(std::make_shared<BufferArg>(arg, shape, argType));
+  _args_.push_back(new BufferArg(arg, shape, argType));
+  addArg(*_args_.back());
 }

 void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
+  _args_.push_back(new SparseMatrixArg(arg, argType));
+  addArg(*_args_.back());
 }

 void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
+  _args_.push_back(new SparseMatrixArg(arg, argType));
+  addArg(*_args_.back());
+}
+
+void BufferArgs::addArg(const Matrix& matrix,
+                        const IVector& vector,
+                        ArgType argType) {
+  _args_.push_back(new SequenceArg(matrix, vector, argType));
+  addArg(*_args_.back());
 }

 ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
--- a/paddle/function/Function.h
+++ b/paddle/function/Function.h
@ -50,19 +50,44 @@ protected:
 * Argument type for Function::calc().
 * A BufferArgs contains a set of BufferArg,
 * because Function can have multiple inputs and outputs.
+ *
+ * addArg() with Matix object used to adapt Layer Argument.
+ * Will create a BufferArg object in addArg(),
+ * and free in destructor of BufferArgs.
+ *
+ * addArg() with BufferArg object, just save BufferArg object address,
+ * and the caller needs to guarantee the validity of the BufferArg object
+ * in the BufferArgs life time.
 */
 class BufferArgs {
 public:
  BufferArgs() {}
+
+  ~BufferArgs() {
+    for (auto arg : _args_) {
+      delete arg;
+    }
+  }
+
  size_t size() const { return args_.size(); }

  // add argument into BufferArgs
  // Tensor can be Matrix, Vector, IVector.
  // For inputs, do not need argType.
  // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
-  template <typename Tensor>
-  void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) {
-    args_.push_back(std::make_shared<BufferArg>(arg, argType));
+  void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
+  }
+
+  void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
+  }
+
+  void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
+    addArg(*_args_.back());
  }

  // Add arg into BufferArgs and reshape the arg.
@ -77,20 +102,37 @@ public:
  void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
  void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);

+  void addArg(const Matrix& matrix,
+              const IVector& vector,
+              ArgType argType = UNSPECIFIED);
+
  // get argument
  const BufferArg& operator[](size_t num) const {
    CHECK_LT(num, args_.size());
    return *args_[num];
  }

+  void addArg(BufferArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SequenceArg& arg) { args_.push_back(&arg); }
+
+  void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
+
 private:
-  std::vector<BufferArgPtr> args_;
+  std::vector<BufferArg*> args_;
+  // The BufferArg object is constructed and freed by BufferArgs.
+  std::vector<BufferArg*> _args_;
 };

 /**
 * \brief Base class for Function.
 * The basic Function implementation requires override init and calc interfaces.
 *
+ * The caller needs to ensure the validity of the arguments
+ * during Function execution.
+ *
 * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
 * and ADD_TO.
 * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation
--- a/paddle/function/FunctionTest.cpp
+++ b/paddle/function/FunctionTest.cpp
@ -14,6 +14,7 @@ limitations under the License. */

 #include "Function.h"
 #include <gtest/gtest.h>
+#include "paddle/math/SparseMatrix.h"

 namespace paddle {

@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
  Function<DEVICE_TYPE_GPU>(gpuArgments);
 }

+/**
+ * Some tests case are used to check the consistency between the BufferArg type
+ * argument received by Function and the original type argument.
+ *
+ * Use Case:
+ *  TEST() {
+ *    Matrix matrix(...);
+ *    CheckBufferArg lambda = [=](const BufferArg& arg) {
+ *      // check matrix and arg are equivalent
+ *      EXPECT_EQ(matrix, arg);
+ *    }
+ *
+ *   BufferArgs argments{matrix...};
+ *   std::vector<CheckBufferArg> checkFunc{lambda...};
+ *   testBufferArgs(argments, checkFunc);
+ *  }
+ */
+typedef std::function<void(const BufferArg&)> CheckBufferArg;
+
+void testBufferArgs(const BufferArgs& inputs,
+                    const std::vector<CheckBufferArg>& check) {
+  EXPECT_EQ(inputs.size(), check.size());
+  for (size_t i = 0; i < inputs.size(); i++) {
+    check[i](inputs[i]);
+  }
+}
+
+void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) {
+  EXPECT_EQ(inputs.size(), 1);
+  check(inputs[0]);
+}
+
+TEST(Arguments, Matrix) {
+  MatrixPtr matrix = Matrix::create(100, 200);
+  CheckBufferArg check = [=](const BufferArg& arg) {
+    EXPECT_EQ(arg.shape().ndims(), 2);
+    EXPECT_EQ(arg.shape()[0], 100);
+    EXPECT_EQ(arg.shape()[1], 200);
+    EXPECT_EQ(arg.data(), matrix->getData());
+
+    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
+    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
+    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
+  };
+
+  BufferArgs argments;
+  argments.addArg(*matrix);
+  std::vector<CheckBufferArg> checkFunc;
+  checkFunc.push_back(check);
+  testBufferArgs(argments, checkFunc);
+}
+
+TEST(Arguments, Vector) {
+  VectorPtr vector = Vector::create(100, false);
+  CheckBufferArg check = [=](const BufferArg& arg) {
+    EXPECT_EQ(arg.shape().ndims(), 1);
+    EXPECT_EQ(arg.shape()[0], 100);
+    EXPECT_EQ(arg.data(), vector->getData());
+
+    CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
+    EXPECT_EQ(inVector.getSize(), vector->getSize());
+    EXPECT_EQ(inVector.getData(), vector->getData());
+  };
+
+  BufferArgs argments;
+  argments.addArg(*vector);
+  std::vector<CheckBufferArg> checkFunc;
+  checkFunc.push_back(check);
+  testBufferArgs(argments, checkFunc);
+}
+
+TEST(Arguments, CpuSparseMatrix) {
+  CpuSparseMatrix sparse(200, 300, 50);
+  CheckBufferArg check = [=](const BufferArg& arg) {
+    EXPECT_EQ(arg.shape().ndims(), 2);
+    EXPECT_EQ(arg.shape()[0], 200);
+    EXPECT_EQ(arg.shape()[1], 300);
+    EXPECT_EQ(arg.data(), sparse.getData());
+    // CHECK_EQ(arg.sparse().nnz(), 50);
+    // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
+    // CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
+    EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows());
+    EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols());
+  };
+
+  BufferArgs argments;
+  argments.addArg(sparse);
+  std::vector<CheckBufferArg> checkFunc;
+  checkFunc.push_back(check);
+  testBufferArgs(argments, checkFunc);
+}
+
+TEST(Arguments, BufferArg) {
+  BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
+  CheckBufferArg check = [=](const BufferArg& arg) {
+    EXPECT_EQ(arg.shape().ndims(), 3);
+    EXPECT_EQ(arg.shape()[0], 1);
+    EXPECT_EQ(arg.shape()[1], 2);
+    EXPECT_EQ(arg.shape()[2], 3);
+  };
+
+  BufferArgs argments;
+  argments.addArg(arg);
+  testBufferArgs(argments, check);
+}
+
 }  // namespace paddle
--- a/paddle/function/FunctionTest.h
+++ b/paddle/function/FunctionTest.h
--- a/Show More
+++ b/Show More