update code

9 years ago · 8d47499e1d
parent 47e88f4a2d f174c00e7e
commit 8d47499e1d
40 changed files with 1043 additions and 583 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -25,9 +25,9 @@ addons:
    packages:
      - gcc-4.8
      - g++-4.8
      - gfortran-4.8
      - git
      - build-essential
      - libatlas-base-dev
      - python
      - python-pip
      - python2.7-dev
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@ -16,7 +16,7 @@
 set(CBLAS_FOUND OFF)
 ## Find MKL First.
-set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
+set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
 find_path(MKL_INCLUDE_DIR mkl.h PATHS
  ${MKL_ROOT}/include)
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@ -15,7 +15,6 @@
 INCLUDE(cblas)
 IF(NOT ${CBLAS_FOUND})
    MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
    INCLUDE(ExternalProject)
    SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
        SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
    ENDIF(WIN32)
    IF(CMAKE_COMPILER_IS_GNUCC)
        ENABLE_LANGUAGE(Fortran)
        LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
    ENDIF(CMAKE_COMPILER_IS_GNUCC)
    IF(NOT CMAKE_Fortran_COMPILER)
        MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
                "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
    ENDIF(NOT CMAKE_Fortran_COMPILER)
    ExternalProject_Add(
        openblas
        ${EXTERNAL_PROJECT_LOG_ARGS}
-        URL                 "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
+        GIT_REPOSITORY      https://github.com/xianyi/OpenBLAS.git
        GIT_TAG             v0.2.19
        PREFIX              ${CBLAS_SOURCES_DIR}
        INSTALL_DIR         ${CBLAS_INSTALL_DIR}
        BUILD_IN_SOURCE     1
-        CONFIGURE_COMMAND   ""
+        BUILD_COMMAND       ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
-        BUILD_COMMAND       make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
+        INSTALL_COMMAND     ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
        INSTALL_COMMAND     make install PREFIX=<INSTALL_DIR>
        UPDATE_COMMAND      ""
        CONFIGURE_COMMAND   ""
    )
    ExternalProject_Add_Step(
        openblas lapacke_install
        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
        COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
        DEPENDEES install
    )
    LIST(APPEND external_project_dependencies openblas)
-ENDIF()
+ENDIF(NOT ${CBLAS_FOUND})
 INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -54,6 +54,7 @@ ExternalProject_Add(
  CONFIGURE_COMMAND
    ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
    -Dprotobuf_BUILD_TESTS=OFF
    -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
    -DCMAKE_BUILD_TYPE=Release
    -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
        "please use pip to upgrade protobuf.")
    ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
 ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
    MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
    ##################################### PYTHON ########################################
    SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
    SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@ -96,6 +96,7 @@ set(COMMON_FLAGS
    -Wno-unused-parameter
    -Wno-unused-function
    -Wno-error=literal-suffix
    -Wno-error=sign-compare
    -Wno-error=unused-local-typedefs)
 set(GPU_COMMON_FLAGS
@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
    -Wno-unused-function
    -Wno-error=sign-compare
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs
    -Wno-error=unused-function  # Warnings in Numpy Header.
--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@ -21,6 +21,7 @@ ELSE(WIN32)
        SET(MACOS_VERSION ${VERSION})
        SET(HOST_SYSTEM "macosx")
    ELSE(APPLE)
        IF(EXISTS "/etc/issue")
            FILE(READ "/etc/issue" LINUX_ISSUE)
            IF(LINUX_ISSUE MATCHES "CentOS")
@ -31,6 +32,14 @@ ELSE(WIN32)
                SET(HOST_SYSTEM "ubuntu")
            ENDIF()
        ENDIF(EXISTS "/etc/issue")
        IF(EXISTS "/etc/redhat-release")
            FILE(READ "/etc/redhat-release" LINUX_ISSUE)
            IF(LINUX_ISSUE MATCHES "CentOS")
                SET(HOST_SYSTEM "centos")
            ENDIF()
        ENDIF(EXISTS "/etc/redhat-release")
    ENDIF(APPLE)
 ENDIF(WIN32)
@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
    LOG_DOWNLOAD    0     # Wrap download in script to log output
    LOG_UPDATE      1     # Wrap update in script to log output
    LOG_CONFIGURE   1     # Wrap configure in script to log output
-    LOG_BUILD       1     # Wrap build in script to log output
+    LOG_BUILD       0     # Wrap build in script to log output
    LOG_TEST        1     # Wrap test in script to log output
-    LOG_INSTALL     1     # Wrap install in script to log output
+    LOG_INSTALL     0     # Wrap install in script to log output
 )
--- a/doc/getstarted/build_and_install/build_from_source_en.md
+++ b/doc/getstarted/build_and_install/build_from_source_en.md
@ -4,6 +4,8 @@ Installing from Sources
 * [1. Download and Setup](#download)
 * [2. Requirements](#requirements)
 * [3. Build on Ubuntu](#ubuntu)
 * [4. Build on Centos](#centos)
 ## <span id="download">Download and Setup</span> 
 You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
@ -64,7 +66,8 @@ As a simple example, consider the following:
 1. **BLAS Dependencies(optional)**
-    Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
+    CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
    To utilize preinstalled BLAS， you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
    ```bash
    # specify MKL
@ -99,7 +102,7 @@ As a simple example, consider the following:
    ```bash
    # necessary
    sudo apt-get update
-    sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git
+    sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
    sudo pip install wheel numpy
    sudo pip install 'protobuf>=3.0.0'
    ```
@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH
 # install PaddlePaddle Python modules.
 sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
 ```
 ## <span id="centos">Build on Centos 7</span>
 ### Install Dependencies
 - **CPU Dependencies**
    ```bash
    # necessary
    sudo yum update
    sudo yum install -y epel-release
    sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
    sudo pip install wheel numpy
    sudo pip install 'protobuf>=3.0.0'
    ```
 - **GPU Dependencies (optional)**
    To build GPU version, you will need the following installed:
        1. a CUDA-capable GPU
        2. A supported version of Linux with a gcc compiler and toolchain
        3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
        4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
    The CUDA development environment relies on tight integration with the host development environment,
    including the host compiler and C runtime libraries, and is therefore only supported on
    distribution versions that have been qualified for this CUDA Toolkit release.
    After downloading cuDNN library, issue the following commands:
    ```bash
    sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
    sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
    ```
    Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
    ```bash
    export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
    export PATH=/usr/local/cuda/bin:$PATH
    ```
 ### Build and Install
 As usual, the best option is to create build folder under paddle project directory.
 ```bash
 mkdir build && cd build
 ``` 
 Finally, you can build and install PaddlePaddle:
 ```bash
 # you can add build option here, such as:    
 cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
 # please use sudo make install, if you want to install PaddlePaddle into the system
 make -j `nproc` && make install
 # set PaddlePaddle installation path in ~/.bashrc
 export PATH=<path to install>/bin:$PATH
 # install PaddlePaddle Python modules.
 sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
 ```
--- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
+++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
@ -32,7 +32,7 @@ pooling_layer 的使用示例如下，详细见 :ref:`api_trainer_config_helpers
 - `pooling_type` 目前支持两种，分别是：MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时（默认值）：
+- `agg_level=AggregateLevel.EACH_TIMESTEP` 时（默认值）：
  - 作用：双层序列经过运算变成一个0层序列，或单层序列经过运算变成一个0层序列
  - 输入：一个双层序列，或一个单层序列
@ -54,7 +54,7 @@ last_seq 的使用示例如下（ :ref:`api_trainer_config_helpers_layers_first_
        last = last_seq(input=layer,
                        agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时（默认值）：
+- `agg_level=AggregateLevel.EACH_TIMESTEP` 时（默认值）：
  - 作用：一个双层序列经过运算变成一个0层序列，或一个单层序列经过运算变成一个0层序列
  - 输入：一个双层序列或一个单层序列
--- a/doc/howto/usage/k8s/k8s_aws_en.md
+++ b/doc/howto/usage/k8s/k8s_aws_en.md
--- a/doc/howto/usage/k8s/src/pserver_and_trainer.png
+++ b/doc/howto/usage/k8s/src/pserver_and_trainer.png
--- a/paddle/function/BufferArg.cpp
+++ b/paddle/function/BufferArg.cpp
@ -20,23 +20,27 @@ limitations under the License. */
 namespace paddle {
 const SequenceArg& BufferArg::sequence() const {
-  // CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
+  CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
  return dynamic_cast<const SequenceArg&>(*this);
 }
 const SparseMatrixArg& BufferArg::sparse() const {
-  // CHECK_EQ(bufferType_, TENSOR_SPARSE);
+  CHECK_EQ(bufferType_, TENSOR_SPARSE);
  return dynamic_cast<const SparseMatrixArg&>(*this);
 }
 SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
  bufferType_ = TENSOR_SPARSE;
 }
 SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
  bufferType_ = TENSOR_SPARSE;
 }
 }  // namespace paddle
--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@ -23,10 +23,11 @@ limitations under the License. */
 namespace paddle {
 enum BufferType {
-  TENSOR_NORMAL = 0,
+  TENSOR_UNKNOWN = 0,
-  TENSOR_SEQUENCE_ID = 1,
+  TENSOR_NORMAL = 1,
-  TENSOR_SEQUENCE_DATA = 2,
+  TENSOR_SEQUENCE_ID = 2,
-  TENSOR_SPARSE = 3
+  TENSOR_SEQUENCE_DATA = 3,
  TENSOR_SPARSE = 4
 };
 enum SparseDataType {
@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
 class BufferArg;
 class SequenceArg;
 class SparseMatrixArg;
 typedef std::shared_ptr<BufferArg> BufferArgPtr;
 /**
 * \brief BufferArg used as the argument type of Function.
@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
 * 3. SequenceArg for a Buffer of sequence data.
 * 4. SparseMatrixArg for a Buffer of sparse matrix.
 *
 * Buffer shape
 * For most buffers, the first dimension `shape()[0]` represents
 * the size of the mini-batch.
 *
 * Buffer argType
 * There is an ArgType property for the BufferArg used as Function Output.
 * Whether the result of the Function calculation is assigned to the
 * output Buffer or added to the output Buffer is determined by the
@ -71,6 +76,14 @@ public:
  ArgType getArgType() const { return argType_; }
 public:
  BufferArg(ValueType valueType,
            const TensorShape& shape,
            ArgType argType = UNSPECIFIED)
      : buf_(nullptr),
        valueType_(valueType),
        shape_(shape),
        argType_(argType) {}
  BufferArg(void* buf,
            ValueType valueType,
            const TensorShape& shape,
@ -86,6 +99,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(2),
        argType_(argType) {
    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, matrix.getHeight());
    shape_.setDim(1, matrix.getWidth());
  }
@ -98,6 +112,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(shape),
        argType_(argType) {
    bufferType_ = TENSOR_NORMAL;
    CHECK_EQ(matrix.getElementCnt(), shape.getElements());
  }
@ -107,6 +122,7 @@ public:
        valueType_(DataType<real>::value),
        shape_(1),
        argType_(argType) {
    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, vector.getSize());
  }
@ -116,6 +132,7 @@ public:
        valueType_(VALUE_TYPE_INT32),
        shape_(1),
        argType_(argType) {
    bufferType_ = TENSOR_NORMAL;
    shape_.setDim(0, vector.getSize());
  }
@ -150,6 +167,8 @@ public:
  ValueType valueType() const { return valueType_; }
  BufferType bufferType() const { return bufferType_; }
  const TensorShape& shape() const { return shape_; }
  bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
  bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
  const SequenceArg& sequence() const;
  const SparseMatrixArg& sparse() const;
@ -158,8 +177,8 @@ protected:
  void* buf_;
  ValueType valueType_;
  TensorShape shape_;
-  BufferType bufferType_;
+  BufferType bufferType_{TENSOR_UNKNOWN};
-  ArgType argType_ = UNSPECIFIED;
+  ArgType argType_{UNSPECIFIED};
  // leading dimensions. The size is dims_.size()
  // Dims lds_;
 };
@ -170,15 +189,24 @@ protected:
 // if a < b then value_.buf_[a] < value_.buf_[b]
 class SequenceIdArg : public BufferArg {
 public:
  SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
      : BufferArg(VALUE_TYPE_INT32, shape, argType) {
    CHECK_EQ(shape_.ndims(), (size_t)1);
    CHECK_GT(shape_[0], 1);
    numSeqs_ = shape_[0] - 1;
  }
  SequenceIdArg(void* buf,
                const TensorShape& shape,
                ArgType argType = UNSPECIFIED)
      : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
    bufferType_ = TENSOR_SEQUENCE_ID;
    CHECK_EQ(shape_.ndims(), (size_t)1);
    numSeqs_ = shape_[0] - 1;
  }
  SequenceIdArg(const IVector& vector) : BufferArg(vector) {
    bufferType_ = TENSOR_SEQUENCE_ID;
    numSeqs_ = shape_[0] - 1;
  }
@ -190,26 +218,41 @@ private:
  size_t numSeqs_;
 };
-// sequence data
+// sequences data
 // For mini-batch calculate,
 // one batch can contain more than one sequence of data.
 // SequenceArg can be used to represent sequences that contain multiple
 // unequal lengths.
 class SequenceArg : public BufferArg {
 public:
  SequenceArg(ValueType valueType,
              const TensorShape& shape,
              ArgType argType = UNSPECIFIED)
      : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
  SequenceArg(void* buf,
              ValueType valueType,
              const TensorShape& shape,
              const SequenceIdArg& startPositions,
              ArgType argType = UNSPECIFIED)
      : BufferArg(buf, valueType, shape, argType),
-        startPositions_(startPositions) {}
+        startPositions_(startPositions) {
    bufferType_ = TENSOR_SEQUENCE_DATA;
  }
  SequenceArg(const Matrix& matrix,
              const IVector& vector,
              ArgType argType = UNSPECIFIED)
-      : BufferArg(matrix, argType), startPositions_(vector) {}
+      : BufferArg(matrix, argType), startPositions_(vector) {
    bufferType_ = TENSOR_SEQUENCE_DATA;
  }
  ~SequenceArg() {}
  void* getIdBuf() const { return startPositions_.data(); }
  size_t numSeqs() const { return startPositions_.numSeqs(); }
  SequenceIdArg& getSequenceId() { return startPositions_; }
  const SequenceIdArg& getSequenceId() const { return startPositions_; }
 private:
  SequenceIdArg startPositions_;
@ -235,6 +278,7 @@ public:
        nnz_(nnz),
        format_(format),
        type_(type) {
    bufferType_ = TENSOR_SPARSE;
    CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
    CHECK_EQ(shape_.ndims(), (size_t)2);
    CHECK_EQ(row_.shape().ndims(), (size_t)1);
--- a/paddle/function/BufferArgTest.cpp
+++ b/paddle/function/BufferArgTest.cpp
@ -14,9 +14,7 @@ limitations under the License. */
 #include "BufferArg.h"
 #include <gtest/gtest.h>
 #include "Function.h"
 #include "paddle/math/MemoryHandle.h"
 #include "paddle/math/SparseMatrix.h"
 namespace paddle {
@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
  EXPECT_EQ(buffer.numSeqs(), 9);
 }
 TEST(BufferTest, asArgument) {
  MatrixPtr matrix = Matrix::create(100, 200);
  VectorPtr vector = Vector::create(100, false);
  CpuSparseMatrix sparse(200, 300, 50);
  // prepare arguments
  BufferArgs argments;
  argments.addArg(*matrix);
  argments.addArg(*vector);
  argments.addArg(sparse);
  // function
  auto function = [=](const BufferArgs& inputs) {
    EXPECT_EQ(inputs.size(), 3);
    // check inputs[0]
    EXPECT_EQ(inputs[0].shape().ndims(), 2);
    EXPECT_EQ(inputs[0].shape()[0], 100);
    EXPECT_EQ(inputs[0].shape()[1], 200);
    EXPECT_EQ(inputs[0].data(), matrix->getData());
    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
              matrix->getHeight());
    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
              matrix->getWidth());
    EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
    // check inputs[1]
    EXPECT_EQ(inputs[1].shape().ndims(), 1);
    EXPECT_EQ(inputs[1].shape()[0], 100);
    EXPECT_EQ(inputs[1].data(), vector->getData());
    CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
    EXPECT_EQ(inVector.getSize(), vector->getSize());
    EXPECT_EQ(inVector.getData(), vector->getData());
    // check inputs[2]
    EXPECT_EQ(inputs[2].shape().ndims(), 2);
    EXPECT_EQ(inputs[2].shape()[0], 200);
    EXPECT_EQ(inputs[2].shape()[1], 300);
    EXPECT_EQ(inputs[2].data(), sparse.getData());
    // CHECK_EQ(inputs[2].sparse().nnz(), 50);
    // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
    // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
    EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
    EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
  };
  // call function
  function(argments);
 }
 }  // namespace paddle
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@ -19,13 +19,13 @@ if(WITH_TESTING)
    # TODO:
    # file(GLOB test_files . *OpTest.cpp)
    # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
-    # add_simple_unittest(CrossMapNormalOpTest)
+    add_simple_unittest(CrossMapNormalOpTest)
    add_simple_unittest(TensorShapeTest)
    add_simple_unittest(TensorTypeTest)
    add_simple_unittest(BufferArgTest)
    add_simple_unittest(FunctionTest)
    add_simple_unittest(ContextProjectionOpTest)
    add_simple_unittest(PadOpTest)
    # add_simple_unittest(ContextProjectionOpTest)
 endif()
 endif()
--- a/paddle/function/ContextProjectionOp.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
--- a/paddle/function/ContextProjectionOp.h
+++ b/paddle/function/ContextProjectionOp.h
@ -21,7 +21,7 @@ namespace paddle {
 /**
 * \brief   Context Projection Forward.
 *
- * \param[out]  outputs           output data.
+ * \param[in/out]  outputs           output data.
 * \param[in]      input             input data.
 * \param[in]      weight            input weight.
 * \param[in]      sequence          input data.
@ -56,7 +56,7 @@ void ContextProjectionForward(
 */
 template <DeviceType DType>
 void ContextProjectionBackward(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& in_grad,
    typename Tensor<real, DType>::Matrix& w_grad,
    const typename Tensor<int, DType>::Vector& seq_vec,
@ -68,7 +68,7 @@ void ContextProjectionBackward(
 template <DeviceType DType>
 void ContextProjectionBackwardData(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& in_grad,
    const typename Tensor<int, DType>::Vector& sequence,
    size_t context_length,
@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
 template <DeviceType DType>
 void ContextProjectionBackwardWeight(
-    typename Tensor<real, DType>::Matrix& out_grad,
+    const typename Tensor<real, DType>::Matrix& out_grad,
    typename Tensor<real, DType>::Matrix& w_grad,
    const typename Tensor<int, DType>::Vector& seq_vec,
    size_t context_length,
--- a/paddle/function/ContextProjectionOpGpu.cu
+++ b/paddle/function/ContextProjectionOpGpu.cu
@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
                                begin_pad);
 }
-__global__ void KeContextProjectionBackwardData(real* out_grad,
+__global__ void KeContextProjectionBackwardData(const real* out_grad,
                                                const int* sequence,
                                                real* in_grad,
-                                                int input_dim,
+                                                size_t input_dim,
                                                int context_length,
                                                int context_start) {
  int idx = threadIdx.x;
@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
  real value = 0;
  int instances = seq_end - seq_start + context_length - 1;
-  out_grad += seq_start * input_dim * context_length;
+  auto out = const_cast<real*>(out_grad);
  out += seq_start * input_dim * context_length;
  in_grad += seq_start * input_dim;
  for (int k = 0; k <= input_dim / block_size; k++) {
    if (idx < input_dim) {
@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
        int outx = (i - context_length) < 0 ? i : (context_length - 1);
        int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
        real* output_r =
-          out_grad + outy * input_dim * context_length + outx * input_dim;
+          out + outy * input_dim * context_length + outx * input_dim;
        for (int j = outy; j < seq_end - seq_start; j++) {
          value += output_r[idx];
          if (j - outy == outx) break;
@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
 * @param[in]   context_start    context start.
 *
 */
-void hl_context_projection_backward_data(real* out_grad,
+void hl_context_projection_backward_data(const real* out_grad,
                                         const int* sequence,
                                         real* input_grad,
                                         size_t num_sequences,
@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
 }
 template <>
-void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
+void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
                                                    GpuMatrix& in_grad,
                                                    const GpuIVector& sequence,
                                                    size_t context_length,
@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
 }
 template<int THREADS_X, int THREADS_Y>
-__global__ void KeContextProjectionBackwardWeight(real* out_grad,
+__global__ void KeContextProjectionBackwardWeight(const real* out_grad,
                                                  const int* sequence,
                                                  real* w_grad,
                                                  int num_sequences,
@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
    for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
      int seq_start = sequence[seqId];
      int seq_end = sequence[seqId+1];
-      output_r = out_grad + seq_start * w_dim * context_length;
+      output_r = const_cast<real*>(out_grad)
                    + seq_start * w_dim * context_length;
      if (context_start < 0) {
        if (padId + context_start < 0) {
@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
 * beginning.
 *
 */
-void hl_context_projection_backward_weight(real* out_grad,
+void hl_context_projection_backward_weight(const real* out_grad,
                                           const int* sequence,
                                           real* w_grad,
                                           size_t num_sequences,
@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
 template <>
 void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
-        GpuMatrix& out_grad,
+        const GpuMatrix& out_grad,
        GpuMatrix& w_grad,
        const GpuIVector& seq_vec,
        size_t context_length,
@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
 }
 template <>
-void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
+void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
                                                GpuMatrix& in_grad,
                                                GpuMatrix& w_grad,
                                                const GpuIVector& sequence,
--- a/paddle/function/ContextProjectionOpTest.cpp
+++ b/paddle/function/ContextProjectionOpTest.cpp
@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
  cpu_out.randomizeUniform();
  gpu_out.copyFrom(cpu_out);
-  compare.getCpuFunction()->calc(
+  BufferArgs cpu_inputs;
-      {Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
+  BufferArgs cpu_outputs;
-       Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
+  cpu_inputs.addArg(cpu_in, *cpu_seq);
-              Dims{pad, input_dim}),
+  if (cpu_weight) {
-       Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
+    cpu_inputs.addArg(*cpu_weight, *cpu_seq);
-              Dims{cpu_seq->getSize()})},
+  }
-      {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
+  cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
-      {});
+
-  compare.getGpuFunction()->calc(
+  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
-      {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
+
-       Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
+  BufferArgs gpu_inputs;
-              Dims{pad, input_dim}),
+  BufferArgs gpu_outputs;
-       Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
+  gpu_inputs.addArg(gpu_in, *gpu_seq);
-              Dims{gpu_seq->getSize()})},
+  if (gpu_weight) {
-      {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
+    gpu_inputs.addArg(*gpu_weight, *gpu_seq);
-      {});
+  }
  gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
  autotest::TensorCheckEqual(cpu_out, gpu_out);
 }
@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
    gpu_w_grad->copyFrom(*cpu_w_grad);
  }
-  compare.getCpuFunction()->calc(
+  BufferArgs cpu_inputs;
-      {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
+  BufferArgs cpu_outputs;
-       Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
+  cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
-              Dims{pad, input_dim}),
+  cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
-       Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
+  cpu_outputs.addArg(
-              Dims{cpu_seq->getSize()})},
+      cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
-      {Tensor(cpu_out_grad.getData(),
+
-              Dims{batch_size, input_dim * context_length})},
+  compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
-      {});
+
-
+  BufferArgs gpu_inputs;
-  compare.getGpuFunction()->calc(
+  BufferArgs gpu_outputs;
-      {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
+  gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
-       Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
+  gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
-              Dims{pad, input_dim}),
+  gpu_outputs.addArg(
-       Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
+      gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
-              Dims{gpu_seq->getSize()})},
+
-      {Tensor(gpu_out_grad.getData(),
+  compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
              Dims{batch_size, input_dim * context_length})},
      {});
  autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
  if (is_padding) {
--- a/paddle/function/CrossMapNormalOp.cpp
+++ b/paddle/function/CrossMapNormalOp.cpp
@ -188,8 +188,13 @@ public:
    CHECK(inputs[0].shape() == inputs[3].shape());
    CHECK(inputs[0].shape() == outputs[0].shape());
-    // TODO(hedaoyuan): need support ASSIGN_TO mode.
+    if (outputs[0].getArgType() != ADD_TO) {
-    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
+      // Currently, some algorithm implementations are ASSIGN_TO mode,
      // if need to support the ADD_TO calculation, need to clear the output.
      typename Tensor<real, Device>::Vector tmp(
          outputs[0].shape().getElements(), outputs[0].data<real>());
      tmp.zero();
    }
    size_t samples = inputs[0].shape()[0];
    size_t channels = inputs[0].shape()[1];
--- a/paddle/function/CrossMapNormalOpTest.cpp
+++ b/paddle/function/CrossMapNormalOpTest.cpp
@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
                    << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
                    << " size=" << size;
-            FunctionCompare compare("CrossMapNormal",
+            // init Test object
            FunctionCompare test("CrossMapNormal",
                                 FuncConfig()
                                     .set("size", size)
                                     .set("scale", (real)1.5)
                                     .set("pow", (real)0.5));
-            Dims dims{numSamples, channels, imgSizeH, imgSizeW};
+            // prepare input arguments
-            compare.cmpWithArg({Tensor(nullptr, dims)},
+            TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
-                               {Tensor(nullptr, dims), Tensor(nullptr, dims)},
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                               {});
+            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
            // run Function
            test.run();
          }
        }
      }
@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
                    << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
                    << " size=" << size;
-            FunctionCompare compare("CrossMapNormalGrad",
+            FunctionCompare test("CrossMapNormalGrad",
                                 FuncConfig()
                                     .set("size", size)
                                     .set("scale", (real)1.5)
                                     .set("pow", (real)0.5));
-            Dims dims{numSamples, channels, imgSizeH, imgSizeW};
+            TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
-            compare.cmpWithArg({Tensor(nullptr, dims),
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                                Tensor(nullptr, dims),
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                                Tensor(nullptr, dims),
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                                Tensor(nullptr, dims)},
+            test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                               {Tensor(nullptr, dims)},
+            test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
-                               {});
+            // run Function
            test.run();
          }
        }
      }
--- a/paddle/function/Function.cpp
+++ b/paddle/function/Function.cpp
@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
 void BufferArgs::addArg(const Matrix& arg,
                        const TensorShape& shape,
                        ArgType argType) {
-  args_.push_back(std::make_shared<BufferArg>(arg, shape, argType));
+  _args_.push_back(new BufferArg(arg, shape, argType));
  addArg(*_args_.back());
 }
 void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
+  _args_.push_back(new SparseMatrixArg(arg, argType));
  addArg(*_args_.back());
 }
 void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
-  args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
+  _args_.push_back(new SparseMatrixArg(arg, argType));
  addArg(*_args_.back());
 }
 void BufferArgs::addArg(const Matrix& matrix,
                        const IVector& vector,
                        ArgType argType) {
  _args_.push_back(new SequenceArg(matrix, vector, argType));
  addArg(*_args_.back());
 }
 ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
--- a/paddle/function/Function.h
+++ b/paddle/function/Function.h
@ -50,19 +50,44 @@ protected:
 * Argument type for Function::calc().
 * A BufferArgs contains a set of BufferArg,
 * because Function can have multiple inputs and outputs.
 *
 * addArg() with Matix object used to adapt Layer Argument.
 * Will create a BufferArg object in addArg(),
 * and free in destructor of BufferArgs.
 *
 * addArg() with BufferArg object, just save BufferArg object address,
 * and the caller needs to guarantee the validity of the BufferArg object
 * in the BufferArgs life time.
 */
 class BufferArgs {
 public:
  BufferArgs() {}
  ~BufferArgs() {
    for (auto arg : _args_) {
      delete arg;
    }
  }
  size_t size() const { return args_.size(); }
  // add argument into BufferArgs
  // Tensor can be Matrix, Vector, IVector.
  // For inputs, do not need argType.
  // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
-  template <typename Tensor>
+  void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
-  void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) {
+    _args_.push_back(new BufferArg(arg, argType));
-    args_.push_back(std::make_shared<BufferArg>(arg, argType));
+    addArg(*_args_.back());
  }
  void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
    _args_.push_back(new BufferArg(arg, argType));
    addArg(*_args_.back());
  }
  void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
    _args_.push_back(new BufferArg(arg, argType));
    addArg(*_args_.back());
  }
  // Add arg into BufferArgs and reshape the arg.
@ -77,20 +102,37 @@ public:
  void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
  void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
  void addArg(const Matrix& matrix,
              const IVector& vector,
              ArgType argType = UNSPECIFIED);
  // get argument
  const BufferArg& operator[](size_t num) const {
    CHECK_LT(num, args_.size());
    return *args_[num];
  }
  void addArg(BufferArg& arg) { args_.push_back(&arg); }
  void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
  void addArg(SequenceArg& arg) { args_.push_back(&arg); }
  void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
 private:
-  std::vector<BufferArgPtr> args_;
+  std::vector<BufferArg*> args_;
  // The BufferArg object is constructed and freed by BufferArgs.
  std::vector<BufferArg*> _args_;
 };
 /**
 * \brief Base class for Function.
 * The basic Function implementation requires override init and calc interfaces.
 *
 * The caller needs to ensure the validity of the arguments
 * during Function execution.
 *
 * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
 * and ADD_TO.
 * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation
--- a/paddle/function/FunctionTest.cpp
+++ b/paddle/function/FunctionTest.cpp
@ -14,6 +14,7 @@ limitations under the License. */
 #include "Function.h"
 #include <gtest/gtest.h>
 #include "paddle/math/SparseMatrix.h"
 namespace paddle {
@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
  Function<DEVICE_TYPE_GPU>(gpuArgments);
 }
 /**
 * Some tests case are used to check the consistency between the BufferArg type
 * argument received by Function and the original type argument.
 *
 * Use Case:
 *  TEST() {
 *    Matrix matrix(...);
 *    CheckBufferArg lambda = [=](const BufferArg& arg) {
 *      // check matrix and arg are equivalent
 *      EXPECT_EQ(matrix, arg);
 *    }
 *
 *   BufferArgs argments{matrix...};
 *   std::vector<CheckBufferArg> checkFunc{lambda...};
 *   testBufferArgs(argments, checkFunc);
 *  }
 */
 typedef std::function<void(const BufferArg&)> CheckBufferArg;
 void testBufferArgs(const BufferArgs& inputs,
                    const std::vector<CheckBufferArg>& check) {
  EXPECT_EQ(inputs.size(), check.size());
  for (size_t i = 0; i < inputs.size(); i++) {
    check[i](inputs[i]);
  }
 }
 void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) {
  EXPECT_EQ(inputs.size(), 1);
  check(inputs[0]);
 }
 TEST(Arguments, Matrix) {
  MatrixPtr matrix = Matrix::create(100, 200);
  CheckBufferArg check = [=](const BufferArg& arg) {
    EXPECT_EQ(arg.shape().ndims(), 2);
    EXPECT_EQ(arg.shape()[0], 100);
    EXPECT_EQ(arg.shape()[1], 200);
    EXPECT_EQ(arg.data(), matrix->getData());
    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
  };
  BufferArgs argments;
  argments.addArg(*matrix);
  std::vector<CheckBufferArg> checkFunc;
  checkFunc.push_back(check);
  testBufferArgs(argments, checkFunc);
 }
 TEST(Arguments, Vector) {
  VectorPtr vector = Vector::create(100, false);
  CheckBufferArg check = [=](const BufferArg& arg) {
    EXPECT_EQ(arg.shape().ndims(), 1);
    EXPECT_EQ(arg.shape()[0], 100);
    EXPECT_EQ(arg.data(), vector->getData());
    CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
    EXPECT_EQ(inVector.getSize(), vector->getSize());
    EXPECT_EQ(inVector.getData(), vector->getData());
  };
  BufferArgs argments;
  argments.addArg(*vector);
  std::vector<CheckBufferArg> checkFunc;
  checkFunc.push_back(check);
  testBufferArgs(argments, checkFunc);
 }
 TEST(Arguments, CpuSparseMatrix) {
  CpuSparseMatrix sparse(200, 300, 50);
  CheckBufferArg check = [=](const BufferArg& arg) {
    EXPECT_EQ(arg.shape().ndims(), 2);
    EXPECT_EQ(arg.shape()[0], 200);
    EXPECT_EQ(arg.shape()[1], 300);
    EXPECT_EQ(arg.data(), sparse.getData());
    // CHECK_EQ(arg.sparse().nnz(), 50);
    // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
    // CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
    EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows());
    EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols());
  };
  BufferArgs argments;
  argments.addArg(sparse);
  std::vector<CheckBufferArg> checkFunc;
  checkFunc.push_back(check);
  testBufferArgs(argments, checkFunc);
 }
 TEST(Arguments, BufferArg) {
  BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
  CheckBufferArg check = [=](const BufferArg& arg) {
    EXPECT_EQ(arg.shape().ndims(), 3);
    EXPECT_EQ(arg.shape()[0], 1);
    EXPECT_EQ(arg.shape()[1], 2);
    EXPECT_EQ(arg.shape()[2], 3);
  };
  BufferArgs argments;
  argments.addArg(arg);
  testBufferArgs(argments, check);
 }
 }  // namespace paddle
--- a/paddle/function/FunctionTest.h
+++ b/paddle/function/FunctionTest.h
--- a/Show More
+++ b/Show More