update code

avx_docs
dangqingqing 8 years ago
commit 8d47499e1d

@ -25,9 +25,9 @@ addons:
packages: packages:
- gcc-4.8 - gcc-4.8
- g++-4.8 - g++-4.8
- gfortran-4.8
- git - git
- build-essential - build-essential
- libatlas-base-dev
- python - python
- python-pip - python-pip
- python2.7-dev - python2.7-dev

@ -16,7 +16,7 @@
set(CBLAS_FOUND OFF) set(CBLAS_FOUND OFF)
## Find MKL First. ## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include) ${MKL_ROOT}/include)

@ -15,7 +15,6 @@
INCLUDE(cblas) INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz" GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
CONFIGURE_COMMAND "" BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER} INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
) )
LIST(APPEND external_project_dependencies openblas) LIST(APPEND external_project_dependencies openblas)
ENDIF() ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})

@ -54,6 +54,7 @@ ExternalProject_Add(
CONFIGURE_COMMAND CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}

@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf.") "please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ######################################## ##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)

@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs) -Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor -Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=unused-local-typedefs -Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.

@ -21,6 +21,7 @@ ELSE(WIN32)
SET(MACOS_VERSION ${VERSION}) SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx") SET(HOST_SYSTEM "macosx")
ELSE(APPLE) ELSE(APPLE)
IF(EXISTS "/etc/issue") IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE) FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS") IF(LINUX_ISSUE MATCHES "CentOS")
@ -31,6 +32,14 @@ ELSE(WIN32)
SET(HOST_SYSTEM "ubuntu") SET(HOST_SYSTEM "ubuntu")
ENDIF() ENDIF()
ENDIF(EXISTS "/etc/issue") ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
ENDIF(APPLE) ENDIF(APPLE)
ENDIF(WIN32) ENDIF(WIN32)
@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output LOG_INSTALL 0 # Wrap install in script to log output
) )

@ -4,6 +4,8 @@ Installing from Sources
* [1. Download and Setup](#download) * [1. Download and Setup](#download)
* [2. Requirements](#requirements) * [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu) * [3. Build on Ubuntu](#ubuntu)
* [4. Build on Centos](#centos)
## <span id="download">Download and Setup</span> ## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle). You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
@ -64,7 +66,8 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)** 1. **BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash ```bash
# specify MKL # specify MKL
@ -99,7 +102,7 @@ As a simple example, consider the following:
```bash ```bash
# necessary # necessary
sudo apt-get update sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
sudo pip install wheel numpy sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0' sudo pip install 'protobuf>=3.0.0'
``` ```
@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules. # install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
``` ```
## <span id="centos">Build on Centos 7</span>
### Install Dependencies
- **CPU Dependencies**
```bash
# necessary
sudo yum update
sudo yum install -y epel-release
sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
- **GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```bash
mkdir build && cd build
```
Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
```

@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers
- `pooling_type` 目前支持两种分别是MaxPooling()和AvgPooling()。 - `pooling_type` 目前支持两种分别是MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用双层序列经过运算变成一个0层序列或单层序列经过运算变成一个0层序列 - 作用双层序列经过运算变成一个0层序列或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列 - 输入:一个双层序列,或一个单层序列
@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_
last = last_seq(input=layer, last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用一个双层序列经过运算变成一个0层序列或一个单层序列经过运算变成一个0层序列 - 作用一个双层序列经过运算变成一个0层序列或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列 - 输入:一个双层序列或一个单层序列

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

@ -20,23 +20,27 @@ limitations under the License. */
namespace paddle { namespace paddle {
const SequenceArg& BufferArg::sequence() const { const SequenceArg& BufferArg::sequence() const {
// CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
return dynamic_cast<const SequenceArg&>(*this); return dynamic_cast<const SequenceArg&>(*this);
} }
const SparseMatrixArg& BufferArg::sparse() const { const SparseMatrixArg& BufferArg::sparse() const {
// CHECK_EQ(bufferType_, TENSOR_SPARSE); CHECK_EQ(bufferType_, TENSOR_SPARSE);
return dynamic_cast<const SparseMatrixArg&>(*this); return dynamic_cast<const SparseMatrixArg&>(*this);
} }
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
} // namespace paddle } // namespace paddle

@ -23,10 +23,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
enum BufferType { enum BufferType {
TENSOR_NORMAL = 0, TENSOR_UNKNOWN = 0,
TENSOR_SEQUENCE_ID = 1, TENSOR_NORMAL = 1,
TENSOR_SEQUENCE_DATA = 2, TENSOR_SEQUENCE_ID = 2,
TENSOR_SPARSE = 3 TENSOR_SEQUENCE_DATA = 3,
TENSOR_SPARSE = 4
}; };
enum SparseDataType { enum SparseDataType {
@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class BufferArg; class BufferArg;
class SequenceArg; class SequenceArg;
class SparseMatrixArg; class SparseMatrixArg;
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/** /**
* \brief BufferArg used as the argument type of Function. * \brief BufferArg used as the argument type of Function.
@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* 3. SequenceArg for a Buffer of sequence data. * 3. SequenceArg for a Buffer of sequence data.
* 4. SparseMatrixArg for a Buffer of sparse matrix. * 4. SparseMatrixArg for a Buffer of sparse matrix.
* *
* Buffer shape
* For most buffers, the first dimension `shape()[0]` represents
* the size of the mini-batch.
*
* Buffer argType
* There is an ArgType property for the BufferArg used as Function Output. * There is an ArgType property for the BufferArg used as Function Output.
* Whether the result of the Function calculation is assigned to the * Whether the result of the Function calculation is assigned to the
* output Buffer or added to the output Buffer is determined by the * output Buffer or added to the output Buffer is determined by the
@ -71,6 +76,14 @@ public:
ArgType getArgType() const { return argType_; } ArgType getArgType() const { return argType_; }
public: public:
BufferArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: buf_(nullptr),
valueType_(valueType),
shape_(shape),
argType_(argType) {}
BufferArg(void* buf, BufferArg(void* buf,
ValueType valueType, ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
@ -86,6 +99,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(2), shape_(2),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight()); shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth()); shape_.setDim(1, matrix.getWidth());
} }
@ -98,6 +112,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(shape), shape_(shape),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements()); CHECK_EQ(matrix.getElementCnt(), shape.getElements());
} }
@ -107,6 +122,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
@ -116,6 +132,7 @@ public:
valueType_(VALUE_TYPE_INT32), valueType_(VALUE_TYPE_INT32),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
@ -150,6 +167,8 @@ public:
ValueType valueType() const { return valueType_; } ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; } BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; } const TensorShape& shape() const { return shape_; }
bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
const SequenceArg& sequence() const; const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const; const SparseMatrixArg& sparse() const;
@ -158,8 +177,8 @@ protected:
void* buf_; void* buf_;
ValueType valueType_; ValueType valueType_;
TensorShape shape_; TensorShape shape_;
BufferType bufferType_; BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_ = UNSPECIFIED; ArgType argType_{UNSPECIFIED};
// leading dimensions. The size is dims_.size() // leading dimensions. The size is dims_.size()
// Dims lds_; // Dims lds_;
}; };
@ -170,15 +189,24 @@ protected:
// if a < b then value_.buf_[a] < value_.buf_[b] // if a < b then value_.buf_[a] < value_.buf_[b]
class SequenceIdArg : public BufferArg { class SequenceIdArg : public BufferArg {
public: public:
SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
: BufferArg(VALUE_TYPE_INT32, shape, argType) {
CHECK_EQ(shape_.ndims(), (size_t)1);
CHECK_GT(shape_[0], 1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(void* buf, SequenceIdArg(void* buf,
const TensorShape& shape, const TensorShape& shape,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
bufferType_ = TENSOR_SEQUENCE_ID;
CHECK_EQ(shape_.ndims(), (size_t)1); CHECK_EQ(shape_.ndims(), (size_t)1);
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
SequenceIdArg(const IVector& vector) : BufferArg(vector) { SequenceIdArg(const IVector& vector) : BufferArg(vector) {
bufferType_ = TENSOR_SEQUENCE_ID;
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
@ -190,26 +218,41 @@ private:
size_t numSeqs_; size_t numSeqs_;
}; };
// sequence data // sequences data
// For mini-batch calculate,
// one batch can contain more than one sequence of data.
// SequenceArg can be used to represent sequences that contain multiple
// unequal lengths.
class SequenceArg : public BufferArg { class SequenceArg : public BufferArg {
public: public:
SequenceArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
SequenceArg(void* buf, SequenceArg(void* buf,
ValueType valueType, ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
const SequenceIdArg& startPositions, const SequenceIdArg& startPositions,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType), : BufferArg(buf, valueType, shape, argType),
startPositions_(startPositions) {} startPositions_(startPositions) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
SequenceArg(const Matrix& matrix, SequenceArg(const Matrix& matrix,
const IVector& vector, const IVector& vector,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(matrix, argType), startPositions_(vector) {} : BufferArg(matrix, argType), startPositions_(vector) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
~SequenceArg() {} ~SequenceArg() {}
void* getIdBuf() const { return startPositions_.data(); } void* getIdBuf() const { return startPositions_.data(); }
size_t numSeqs() const { return startPositions_.numSeqs(); } size_t numSeqs() const { return startPositions_.numSeqs(); }
SequenceIdArg& getSequenceId() { return startPositions_; }
const SequenceIdArg& getSequenceId() const { return startPositions_; }
private: private:
SequenceIdArg startPositions_; SequenceIdArg startPositions_;
@ -235,6 +278,7 @@ public:
nnz_(nnz), nnz_(nnz),
format_(format), format_(format),
type_(type) { type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1); CHECK_EQ(row_.shape().ndims(), (size_t)1);

@ -14,9 +14,7 @@ limitations under the License. */
#include "BufferArg.h" #include "BufferArg.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/MemoryHandle.h" #include "paddle/math/MemoryHandle.h"
#include "paddle/math/SparseMatrix.h"
namespace paddle { namespace paddle {
@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
EXPECT_EQ(buffer.numSeqs(), 9); EXPECT_EQ(buffer.numSeqs(), 9);
} }
TEST(BufferTest, asArgument) {
MatrixPtr matrix = Matrix::create(100, 200);
VectorPtr vector = Vector::create(100, false);
CpuSparseMatrix sparse(200, 300, 50);
// prepare arguments
BufferArgs argments;
argments.addArg(*matrix);
argments.addArg(*vector);
argments.addArg(sparse);
// function
auto function = [=](const BufferArgs& inputs) {
EXPECT_EQ(inputs.size(), 3);
// check inputs[0]
EXPECT_EQ(inputs[0].shape().ndims(), 2);
EXPECT_EQ(inputs[0].shape()[0], 100);
EXPECT_EQ(inputs[0].shape()[1], 200);
EXPECT_EQ(inputs[0].data(), matrix->getData());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
matrix->getHeight());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
matrix->getWidth());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
// check inputs[1]
EXPECT_EQ(inputs[1].shape().ndims(), 1);
EXPECT_EQ(inputs[1].shape()[0], 100);
EXPECT_EQ(inputs[1].data(), vector->getData());
CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
// check inputs[2]
EXPECT_EQ(inputs[2].shape().ndims(), 2);
EXPECT_EQ(inputs[2].shape()[0], 200);
EXPECT_EQ(inputs[2].shape()[1], 300);
EXPECT_EQ(inputs[2].data(), sparse.getData());
// CHECK_EQ(inputs[2].sparse().nnz(), 50);
// CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
};
// call function
function(argments);
}
} // namespace paddle } // namespace paddle

@ -19,13 +19,13 @@ if(WITH_TESTING)
# TODO: # TODO:
# file(GLOB test_files . *OpTest.cpp) # file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
# add_simple_unittest(CrossMapNormalOpTest) add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(TensorShapeTest) add_simple_unittest(TensorShapeTest)
add_simple_unittest(TensorTypeTest) add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest) add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest) add_simple_unittest(FunctionTest)
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(PadOpTest) add_simple_unittest(PadOpTest)
# add_simple_unittest(ContextProjectionOpTest)
endif() endif()
endif() endif()

File diff suppressed because it is too large Load Diff

@ -21,7 +21,7 @@ namespace paddle {
/** /**
* \brief Context Projection Forward. * \brief Context Projection Forward.
* *
* \param[out] outputs output data. * \param[in/out] outputs output data.
* \param[in] input input data. * \param[in] input input data.
* \param[in] weight input weight. * \param[in] weight input weight.
* \param[in] sequence input data. * \param[in] sequence input data.
@ -56,7 +56,7 @@ void ContextProjectionForward(
*/ */
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackward( void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardData( void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence, const typename Tensor<int, DType>::Vector& sequence,
size_t context_length, size_t context_length,
@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardWeight( void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length, size_t context_length,

@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad); begin_pad);
} }
__global__ void KeContextProjectionBackwardData(real* out_grad, __global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence, const int* sequence,
real* in_grad, real* in_grad,
int input_dim, size_t input_dim,
int context_length, int context_length,
int context_start) { int context_start) {
int idx = threadIdx.x; int idx = threadIdx.x;
@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0; real value = 0;
int instances = seq_end - seq_start + context_length - 1; int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length; auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim; in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) { for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) { if (idx < input_dim) {
@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1); int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r = real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim; out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) { for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx]; value += output_r[idx];
if (j - outy == outx) break; if (j - outy == outx) break;
@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start. * @param[in] context_start context start.
* *
*/ */
void hl_context_projection_backward_data(real* out_grad, void hl_context_projection_backward_data(const real* out_grad,
const int* sequence, const int* sequence,
real* input_grad, real* input_grad,
size_t num_sequences, size_t num_sequences,
@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
} }
template <> template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
const GpuIVector& sequence, const GpuIVector& sequence,
size_t context_length, size_t context_length,
@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
} }
template<int THREADS_X, int THREADS_Y> template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad, __global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
int num_sequences, int num_sequences,
@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId]; int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1]; int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length; output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) { if (context_start < 0) {
if (padId + context_start < 0) { if (padId + context_start < 0) {
@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning. * beginning.
* *
*/ */
void hl_context_projection_backward_weight(real* out_grad, void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
size_t num_sequences, size_t num_sequences,
@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <> template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>( void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad, const GpuMatrix& out_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& seq_vec, const GpuIVector& seq_vec,
size_t context_length, size_t context_length,
@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
} }
template <> template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& sequence, const GpuIVector& sequence,

@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform(); cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out); gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_weight ? cpu_weight->getData() : nullptr, cpu_inputs.addArg(cpu_in, *cpu_seq);
Dims{pad, input_dim}), if (cpu_weight) {
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_inputs.addArg(*cpu_weight, *cpu_seq);
Dims{cpu_seq->getSize()})}, }
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
{});
compare.getGpuFunction()->calc( compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr, BufferArgs gpu_inputs;
Dims{pad, input_dim}), BufferArgs gpu_outputs;
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_inputs.addArg(gpu_in, *gpu_seq);
Dims{gpu_seq->getSize()})}, if (gpu_weight) {
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, gpu_inputs.addArg(*gpu_weight, *gpu_seq);
{}); }
gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out); autotest::TensorCheckEqual(cpu_out, gpu_out);
} }
@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad); gpu_w_grad->copyFrom(*cpu_w_grad);
} }
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
Dims{pad, input_dim}), cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_outputs.addArg(
Dims{cpu_seq->getSize()})}, cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})}, compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{});
BufferArgs gpu_inputs;
compare.getGpuFunction()->calc( BufferArgs gpu_outputs;
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
Dims{pad, input_dim}), gpu_outputs.addArg(
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(), compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
Dims{batch_size, input_dim * context_length})},
{});
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) { if (is_padding) {

@ -188,8 +188,13 @@ public:
CHECK(inputs[0].shape() == inputs[3].shape()); CHECK(inputs[0].shape() == inputs[3].shape());
CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[0].shape());
// TODO(hedaoyuan): need support ASSIGN_TO mode. if (outputs[0].getArgType() != ADD_TO) {
CHECK_EQ(outputs[0].getArgType(), ADD_TO); // Currently, some algorithm implementations are ASSIGN_TO mode,
// if need to support the ADD_TO calculation, need to clear the output.
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
size_t samples = inputs[0].shape()[0]; size_t samples = inputs[0].shape()[0];
size_t channels = inputs[0].shape()[1]; size_t channels = inputs[0].shape()[1];

@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare compare("CrossMapNormal", // init Test object
FunctionCompare test("CrossMapNormal",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
.set("pow", (real)0.5)); .set("pow", (real)0.5));
Dims dims{numSamples, channels, imgSizeH, imgSizeW}; // prepare input arguments
compare.cmpWithArg({Tensor(nullptr, dims)}, TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
{Tensor(nullptr, dims), Tensor(nullptr, dims)}, test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
{}); test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
// run Function
test.run();
} }
} }
} }
@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare compare("CrossMapNormalGrad", FunctionCompare test("CrossMapNormalGrad",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
.set("pow", (real)0.5)); .set("pow", (real)0.5));
Dims dims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
compare.cmpWithArg({Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims)}, test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
{Tensor(nullptr, dims)}, test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
{}); // run Function
test.run();
} }
} }
} }

@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
void BufferArgs::addArg(const Matrix& arg, void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape, const TensorShape& shape,
ArgType argType) { ArgType argType) {
args_.push_back(std::make_shared<BufferArg>(arg, shape, argType)); _args_.push_back(new BufferArg(arg, shape, argType));
addArg(*_args_.back());
} }
void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType)); _args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
} }
void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType)); _args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
}
void BufferArgs::addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType) {
_args_.push_back(new SequenceArg(matrix, vector, argType));
addArg(*_args_.back());
} }
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_; ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;

@ -50,19 +50,44 @@ protected:
* Argument type for Function::calc(). * Argument type for Function::calc().
* A BufferArgs contains a set of BufferArg, * A BufferArgs contains a set of BufferArg,
* because Function can have multiple inputs and outputs. * because Function can have multiple inputs and outputs.
*
* addArg() with Matix object used to adapt Layer Argument.
* Will create a BufferArg object in addArg(),
* and free in destructor of BufferArgs.
*
* addArg() with BufferArg object, just save BufferArg object address,
* and the caller needs to guarantee the validity of the BufferArg object
* in the BufferArgs life time.
*/ */
class BufferArgs { class BufferArgs {
public: public:
BufferArgs() {} BufferArgs() {}
~BufferArgs() {
for (auto arg : _args_) {
delete arg;
}
}
size_t size() const { return args_.size(); } size_t size() const { return args_.size(); }
// add argument into BufferArgs // add argument into BufferArgs
// Tensor can be Matrix, Vector, IVector. // Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType. // For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template <typename Tensor> void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { _args_.push_back(new BufferArg(arg, argType));
args_.push_back(std::make_shared<BufferArg>(arg, argType)); addArg(*_args_.back());
}
void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
} }
// Add arg into BufferArgs and reshape the arg. // Add arg into BufferArgs and reshape the arg.
@ -77,20 +102,37 @@ public:
void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED);
// get argument // get argument
const BufferArg& operator[](size_t num) const { const BufferArg& operator[](size_t num) const {
CHECK_LT(num, args_.size()); CHECK_LT(num, args_.size());
return *args_[num]; return *args_[num];
} }
void addArg(BufferArg& arg) { args_.push_back(&arg); }
void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
void addArg(SequenceArg& arg) { args_.push_back(&arg); }
void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
private: private:
std::vector<BufferArgPtr> args_; std::vector<BufferArg*> args_;
// The BufferArg object is constructed and freed by BufferArgs.
std::vector<BufferArg*> _args_;
}; };
/** /**
* \brief Base class for Function. * \brief Base class for Function.
* The basic Function implementation requires override init and calc interfaces. * The basic Function implementation requires override init and calc interfaces.
* *
* The caller needs to ensure the validity of the arguments
* during Function execution.
*
* Function inputs are readonly, Function outputs have two modes: ASSIGN_TO * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
* and ADD_TO. * and ADD_TO.
* If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation

@ -14,6 +14,7 @@ limitations under the License. */
#include "Function.h" #include "Function.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/SparseMatrix.h"
namespace paddle { namespace paddle {
@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
Function<DEVICE_TYPE_GPU>(gpuArgments); Function<DEVICE_TYPE_GPU>(gpuArgments);
} }
/**
* Some tests case are used to check the consistency between the BufferArg type
* argument received by Function and the original type argument.
*
* Use Case:
* TEST() {
* Matrix matrix(...);
* CheckBufferArg lambda = [=](const BufferArg& arg) {
* // check matrix and arg are equivalent
* EXPECT_EQ(matrix, arg);
* }
*
* BufferArgs argments{matrix...};
* std::vector<CheckBufferArg> checkFunc{lambda...};
* testBufferArgs(argments, checkFunc);
* }
*/
typedef std::function<void(const BufferArg&)> CheckBufferArg;
void testBufferArgs(const BufferArgs& inputs,
const std::vector<CheckBufferArg>& check) {
EXPECT_EQ(inputs.size(), check.size());
for (size_t i = 0; i < inputs.size(); i++) {
check[i](inputs[i]);
}
}
void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) {
EXPECT_EQ(inputs.size(), 1);
check(inputs[0]);
}
TEST(Arguments, Matrix) {
MatrixPtr matrix = Matrix::create(100, 200);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.shape()[1], 200);
EXPECT_EQ(arg.data(), matrix->getData());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
};
BufferArgs argments;
argments.addArg(*matrix);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, Vector) {
VectorPtr vector = Vector::create(100, false);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 1);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.data(), vector->getData());
CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
};
BufferArgs argments;
argments.addArg(*vector);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, CpuSparseMatrix) {
CpuSparseMatrix sparse(200, 300, 50);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 200);
EXPECT_EQ(arg.shape()[1], 300);
EXPECT_EQ(arg.data(), sparse.getData());
// CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols());
};
BufferArgs argments;
argments.addArg(sparse);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, BufferArg) {
BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 3);
EXPECT_EQ(arg.shape()[0], 1);
EXPECT_EQ(arg.shape()[1], 2);
EXPECT_EQ(arg.shape()[2], 3);
};
BufferArgs argments;
argments.addArg(arg);
testBufferArgs(argments, check);
}
} // namespace paddle } // namespace paddle

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save