Merge branch 'develop' of https://github.com/paddlepaddle/paddle into dockerfile-dev

avx_docs
王益 9 years ago
commit 748cb5aff3

@ -25,9 +25,9 @@ addons:
packages:
- gcc-4.8
- g++-4.8
- gfortran-4.8
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev

@ -16,7 +16,7 @@
set(CBLAS_FOUND OFF)
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include)

@ -1,11 +1,11 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=OFF
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
)

@ -15,7 +15,6 @@
INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
)
LIST(APPEND external_project_dependencies openblas)
ENDIF()
ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})

@ -29,17 +29,12 @@ IF(WIN32)
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
IF(${HOST_SYSTEM} STREQUAL "centos")
SET(LIB "lib64")
ELSE()
SET(LIB "lib")
ENDIF()
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
@ -54,9 +49,11 @@ ExternalProject_Add(
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
)
LIST(APPEND external_project_dependencies protobuf)

@ -26,11 +26,12 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
"please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF()
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)

@ -38,14 +38,6 @@ IF(NOT SWIG_FOUND)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# From PCRE configure
ExternalProject_Add(pcre
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/svn2github/pcre.git
PREFIX ${SWIG_SOURCES_DIR}/pcre
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SWIG_INSTALL_DIR}/pcre
)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
@ -54,16 +46,11 @@ IF(NOT SWIG_FOUND)
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && ./autogen.sh
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig &&
env "PCRE_LIBS=${SWIG_INSTALL_DIR}/pcre/lib/libpcre.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcrecpp.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcreposix.a"
./configure
--prefix=${SWIG_INSTALL_DIR}
--with-pcre-prefix=${SWIG_INSTALL_DIR}/pcre
BUILD_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make
INSTALL_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make install
UPDATE_COMMAND ""
DEPENDS pcre
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})

@ -54,6 +54,7 @@ ExternalProject_Add(
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
CMAKE_ARGS -DBUILD_SHARED=ON
)

@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.

@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
# but the name like centos is necessary in some scenes
# to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
IF(WIN32)
SET(HOST_SYSTEM "win32")
ELSE(WIN32)
@ -21,6 +29,7 @@ ELSE(WIN32)
SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx")
ELSE(APPLE)
IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
@ -29,8 +38,24 @@ ELSE(WIN32)
SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu")
ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
SET(HOST_SYSTEM "redhat")
ELSEIF(LINUX_ISSUE MATCHES "Fedora")
SET(HOST_SYSTEM "fedora")
ENDIF()
ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
IF(NOT HOST_SYSTEM)
SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
ENDIF()
ENDIF(APPLE)
ENDIF(WIN32)
@ -47,7 +72,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output
LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output
LOG_INSTALL 0 # Wrap install in script to log output
)

@ -25,6 +25,6 @@ paddle train \
--config_args=is_predict=1 \
--predict_output_dir=.
python gen_result.py > result.txt
python gen_result.py > result.csv
rm -rf rank-00000

@ -382,6 +382,15 @@ sampling_id_layer
:members: sampling_id_layer
:noindex:
Slicing and Joining Layers
==========================
pad_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
:members: pad_layer
:noindex:
.. _api_trainer_config_helpers_layers_cost_layers:
Cost Layers

@ -4,6 +4,8 @@ Installing from Sources
* [1. Download and Setup](#download)
* [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu)
* [4. Build on Centos](#centos)
## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
@ -16,9 +18,10 @@ cd paddle
To compile the source code, your computer must be equipped with the following dependencies.
- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1)
- **CMake**: version >= 3.0 (at least CMake 3.4 on Mac OS X)
- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1) and gfortran compiler
- **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X)
- **BLAS**: MKL, OpenBlas or ATLAS
- **Python**: only support Python 2.7
**Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported!
For CUDA 8.0, GCC versions later than 5.3 are not supported!
@ -64,7 +67,8 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash
# specify MKL
@ -94,12 +98,78 @@ As a simple example, consider the following:
### Install Dependencies
- **CPU Dependencies**
- **Paddle Dependencies**
```bash
# necessary
sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git
sudo apt-get install -y git curl gcc g++ gfortran make build-essential automake
sudo apt-get install -y python python-pip python-numpy libpython-dev bison
sudo pip install 'protobuf==3.1.0.post1'
# install cmake 3.4
curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \
cd .. && rm -rf cmake-3.4.1
```
- **GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```bash
mkdir build && cd build
```
Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
```
## <span id="centos">Build on Centos 7</span>
### Install Dependencies
- **CPU Dependencies**
```bash
# necessary
sudo yum update
sudo yum install -y epel-release
sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
@ -142,7 +212,7 @@ Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DCMAKE_INSTALL_PREFIX=<path to install>
cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc

@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers
- `pooling_type` 目前支持两种分别是MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用双层序列经过运算变成一个0层序列或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列
@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_
last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用一个双层序列经过运算变成一个0层序列或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

@ -20,23 +20,27 @@ limitations under the License. */
namespace paddle {
const SequenceArg& BufferArg::sequence() const {
// CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
return dynamic_cast<const SequenceArg&>(*this);
}
const SparseMatrixArg& BufferArg::sparse() const {
// CHECK_EQ(bufferType_, TENSOR_SPARSE);
CHECK_EQ(bufferType_, TENSOR_SPARSE);
return dynamic_cast<const SparseMatrixArg&>(*this);
}
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
} // namespace paddle

@ -23,10 +23,11 @@ limitations under the License. */
namespace paddle {
enum BufferType {
TENSOR_NORMAL = 0,
TENSOR_SEQUENCE_ID = 1,
TENSOR_SEQUENCE_DATA = 2,
TENSOR_SPARSE = 3
TENSOR_UNKNOWN = 0,
TENSOR_NORMAL = 1,
TENSOR_SEQUENCE_ID = 2,
TENSOR_SEQUENCE_DATA = 3,
TENSOR_SPARSE = 4
};
enum SparseDataType {
@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class BufferArg;
class SequenceArg;
class SparseMatrixArg;
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/**
* \brief BufferArg used as the argument type of Function.
@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* 3. SequenceArg for a Buffer of sequence data.
* 4. SparseMatrixArg for a Buffer of sparse matrix.
*
* Buffer shape
* For most buffers, the first dimension `shape()[0]` represents
* the size of the mini-batch.
*
* Buffer argType
* There is an ArgType property for the BufferArg used as Function Output.
* Whether the result of the Function calculation is assigned to the
* output Buffer or added to the output Buffer is determined by the
@ -71,6 +76,14 @@ public:
ArgType getArgType() const { return argType_; }
public:
BufferArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: buf_(nullptr),
valueType_(valueType),
shape_(shape),
argType_(argType) {}
BufferArg(void* buf,
ValueType valueType,
const TensorShape& shape,
@ -86,6 +99,7 @@ public:
valueType_(DataType<real>::value),
shape_(2),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth());
}
@ -98,6 +112,7 @@ public:
valueType_(DataType<real>::value),
shape_(shape),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements());
}
@ -107,6 +122,7 @@ public:
valueType_(DataType<real>::value),
shape_(1),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize());
}
@ -116,6 +132,7 @@ public:
valueType_(VALUE_TYPE_INT32),
shape_(1),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize());
}
@ -150,6 +167,8 @@ public:
ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; }
bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const;
@ -158,8 +177,8 @@ protected:
void* buf_;
ValueType valueType_;
TensorShape shape_;
BufferType bufferType_;
ArgType argType_ = UNSPECIFIED;
BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED};
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
@ -170,15 +189,24 @@ protected:
// if a < b then value_.buf_[a] < value_.buf_[b]
class SequenceIdArg : public BufferArg {
public:
SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
: BufferArg(VALUE_TYPE_INT32, shape, argType) {
CHECK_EQ(shape_.ndims(), (size_t)1);
CHECK_GT(shape_[0], 1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(void* buf,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
bufferType_ = TENSOR_SEQUENCE_ID;
CHECK_EQ(shape_.ndims(), (size_t)1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(const IVector& vector) : BufferArg(vector) {
bufferType_ = TENSOR_SEQUENCE_ID;
numSeqs_ = shape_[0] - 1;
}
@ -190,26 +218,41 @@ private:
size_t numSeqs_;
};
// sequence data
// sequences data
// For mini-batch calculate,
// one batch can contain more than one sequence of data.
// SequenceArg can be used to represent sequences that contain multiple
// unequal lengths.
class SequenceArg : public BufferArg {
public:
SequenceArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
SequenceArg(void* buf,
ValueType valueType,
const TensorShape& shape,
const SequenceIdArg& startPositions,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType),
startPositions_(startPositions) {}
startPositions_(startPositions) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
SequenceArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED)
: BufferArg(matrix, argType), startPositions_(vector) {}
: BufferArg(matrix, argType), startPositions_(vector) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
~SequenceArg() {}
void* getIdBuf() const { return startPositions_.data(); }
size_t numSeqs() const { return startPositions_.numSeqs(); }
SequenceIdArg& getSequenceId() { return startPositions_; }
const SequenceIdArg& getSequenceId() const { return startPositions_; }
private:
SequenceIdArg startPositions_;
@ -235,6 +278,7 @@ public:
nnz_(nnz),
format_(format),
type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1);

@ -14,9 +14,7 @@ limitations under the License. */
#include "BufferArg.h"
#include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/MemoryHandle.h"
#include "paddle/math/SparseMatrix.h"
namespace paddle {
@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
EXPECT_EQ(buffer.numSeqs(), 9);
}
TEST(BufferTest, asArgument) {
MatrixPtr matrix = Matrix::create(100, 200);
VectorPtr vector = Vector::create(100, false);
CpuSparseMatrix sparse(200, 300, 50);
// prepare arguments
BufferArgs argments;
argments.addArg(*matrix);
argments.addArg(*vector);
argments.addArg(sparse);
// function
auto function = [=](const BufferArgs& inputs) {
EXPECT_EQ(inputs.size(), 3);
// check inputs[0]
EXPECT_EQ(inputs[0].shape().ndims(), 2);
EXPECT_EQ(inputs[0].shape()[0], 100);
EXPECT_EQ(inputs[0].shape()[1], 200);
EXPECT_EQ(inputs[0].data(), matrix->getData());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
matrix->getHeight());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
matrix->getWidth());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
// check inputs[1]
EXPECT_EQ(inputs[1].shape().ndims(), 1);
EXPECT_EQ(inputs[1].shape()[0], 100);
EXPECT_EQ(inputs[1].data(), vector->getData());
CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
// check inputs[2]
EXPECT_EQ(inputs[2].shape().ndims(), 2);
EXPECT_EQ(inputs[2].shape()[0], 200);
EXPECT_EQ(inputs[2].shape()[1], 300);
EXPECT_EQ(inputs[2].data(), sparse.getData());
// CHECK_EQ(inputs[2].sparse().nnz(), 50);
// CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
};
// call function
function(argments);
}
} // namespace paddle

@ -19,12 +19,13 @@ if(WITH_TESTING)
# TODO:
# file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
# add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(TensorShapeTest)
add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest)
# add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(PadOpTest)
endif()
endif()

File diff suppressed because it is too large Load Diff

@ -21,14 +21,14 @@ namespace paddle {
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
* \param[in/out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
*
*/
template <DeviceType DType>
@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template <DeviceType DType>
void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType>
void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence,
size_t context_length,
@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType>
void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length,

@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad);
}
__global__ void KeContextProjectionBackwardData(real* out_grad,
__global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence,
real* in_grad,
int input_dim,
size_t input_dim,
int context_length,
int context_start) {
int idx = threadIdx.x;
@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0;
int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length;
auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) {
@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim;
out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx];
if (j - outy == outx) break;
@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start.
*
*/
void hl_context_projection_backward_data(real* out_grad,
void hl_context_projection_backward_data(const real* out_grad,
const int* sequence,
real* input_grad,
size_t num_sequences,
@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
}
template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
const GpuIVector& sequence,
size_t context_length,
@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
}
template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad,
__global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence,
real* w_grad,
int num_sequences,
@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length;
output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) {
if (padId + context_start < 0) {
@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning.
*
*/
void hl_context_projection_backward_weight(real* out_grad,
void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence,
real* w_grad,
size_t num_sequences,
@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad,
const GpuMatrix& out_grad,
GpuMatrix& w_grad,
const GpuIVector& seq_vec,
size_t context_length,
@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
}
template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
GpuMatrix& w_grad,
const GpuIVector& sequence,

@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc(
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
compare.getGpuFunction()->calc(
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_in, *cpu_seq);
if (cpu_weight) {
cpu_inputs.addArg(*cpu_weight, *cpu_seq);
}
cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_in, *gpu_seq);
if (gpu_weight) {
gpu_inputs.addArg(*gpu_weight, *gpu_seq);
}
gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out);
}
@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad);
}
compare.getCpuFunction()->calc(
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
compare.getGpuFunction()->calc(
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
cpu_outputs.addArg(
cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
gpu_outputs.addArg(
gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) {

@ -112,11 +112,51 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
}
/**
* \brief {o_0, o_1} = calc(i_0)
* \brief Normalization with across maps.
*
* \param inputs[0] input value.
* \param outputs[0] output value.
* \param outputs[1] denoms.
* This Function comes from the paper
* "ImageNet Classification with Deep Convolutional Neural Networks".
*
* The original formula is:
*
* Input(i, x, y)
* Output(i, x, y) = ----------------------------------------------
* -- upper
* (k + alpha * > (Input(j, x, y))^2) ^ (beta)
* -- j = lower
*
* upper is `min(C, c + N/2)`
* lower if `max(0, c - N/2)`
*
* Function implementation:
*
* inputs and outpus is NCHW format, while input.shape.ndims() is equal 4.
* And the meaning of each dimension(0-3) is respectively batch size,
* feature maps, rows and columns.
*
* Input and Output in the above formula is for each map(i) of one image, and
* Input(i, x, y), Output(i, x, y) represents an element in an image.
*
* C is the number of feature maps of one image, and N is a hyper-parameters
* is configured when Function is initialized. The sum in the denominator
* is the sum of the same position in the neighboring maps.
*
* In the implementation of Function, k is equal to 1,
* so Function has no argument for k.
*
* Function Arguments:
*
* \param size_ represent N
* \param scale_ represent alpha
* \param pow_ represent beta
* \param inputs[0] represent Input
* \param outputs[0] represent Output
* \param outputs[1] represent The denominator in the formula(except beta)
*
* Note:
* Save output[1] is to simplify the backward calculation.
* TODO, if only consider the forward calculation, we can optimize to
* remove the output[1].
*/
template <DeviceType Device>
class CrossMapNormalFunc : public FunctionBase {
@ -161,13 +201,36 @@ private:
};
/**
* \brief {o_0} = calc(i_0, i_1, i_2, i_3)
* \brief Backward calculation for normalization with across maps.
*
* Function implementation:
*
* The implementation of this Function is derived from the
* CrossMapNormalFunc implementation.
*
* InputGrad = OutputGrad * denoms ^ (-beta)
* -- upper
* + > (OutputGrad * OutputValue * (-2 * alpha * beta) / denoms) * InputValue
* -- lower
*
* \param inputs[0] input value.
* \param inputs[1] output value.
* \param inputs[2] output grad.
* \param inputs[3] denoms.
* \param outputs[0] input grad.
* The data of inputs/outputs format is the same as the forward interface
* and is NCHW.
*
* The upper and lower is the same as forward. The logic of the sum
* is also the same as forward.
*
* Function Arguments:
*
* \param size_ represent N
* \param scale_ represent alpha
* \param pow_ represent beta
* \param inputs[0] represent InputValue, inputs[0] of CrossMapNormalFunc
* \param inputs[1] represent OutputValue, outputs[0] of CrossMapNormalFunc
* \param inputs[2] represent OutputGrad
* \param inputs[3] represent denoms, outputs[1] of CrossMapNormalFunc
* This is the intermediate result that is
* preserved in the forward calculation.
* \param outputs[0] represent InputGrad
*/
template <DeviceType Device>
class CrossMapNormalGradFunc : public FunctionBase {
@ -188,8 +251,13 @@ public:
CHECK(inputs[0].shape() == inputs[3].shape());
CHECK(inputs[0].shape() == outputs[0].shape());
// TODO(hedaoyuan): need support ASSIGN_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
if (outputs[0].getArgType() != ADD_TO) {
// Currently, some algorithm implementations are ASSIGN_TO mode,
// if need to support the ADD_TO calculation, need to clear the output.
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
size_t samples = inputs[0].shape()[0];
size_t channels = inputs[0].shape()[1];

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save