Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into thinnerdocker

0.10.0rc
yi.wu 9 years ago
commit c1d5aaa15c

@ -1,9 +1,9 @@
# Use ccache if found ccache program
find_program(CCACHE_FOUND ccache)
find_program(CCACHE_PATH ccache)
if(CCACHE_FOUND)
if(CCACHE_PATH)
message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)

@ -16,6 +16,14 @@ INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf 3.1)
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
ENDIF(PROTOBUF_FOUND)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)

@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links()
endif()
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
else()
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu)
endif()
else()
set(METRIC_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
${METRIC_LIBS}
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
@ -95,7 +84,6 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
${METRIC_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}

@ -286,3 +286,16 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID相同名字
.. code-block:: bash
paddle train --use_gpu=true --trainer_count=2 --gpu_id=2
12. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办?
------------------------------------------------------------------------
Paddle二进制在运行时捕获了浮点数异常只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。
主要原因包括两个方面:
* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。
* 模型一直不收敛,发散到了一个数值特别大的地方。
* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。
主要的解决办法是减小学习律或者对数据进行归一化处理。

@ -4,6 +4,86 @@ PaddlePaddle的Docker容器使用方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统包括LinuxMac OS X和Windows上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。
纯CPU和GPU的docker镜像使用说明
------------------------------
对于每一个PaddlePaddle版本我们都会发布两个Docker镜像纯CPU的和GPU的。
我们通过设置 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 自动生成最新的docker镜像
`paddledev/paddle:0.10.0rc1-cpu``paddledev/paddle:0.10.0rc1-gpu`
以交互容器方式运行纯CPU的镜像
.. code-block:: bash
docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
或者,可以以后台进程方式运行容器:
.. code-block:: bash
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu
然后用密码 :code:`root` SSH进入容器
.. code-block:: bash
ssh -p 2202 root@localhost
SSH方式的一个优点是我们可以从多个终端进入容器。比如一个终端运行vi另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上并在笔记本上通过SSH与其连接。
以上方法在GPU镜像里也能用只是请不要忘记按装CUDA驱动以及告诉Docker
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
运行PaddlePaddle书籍
---------------------
Jupyter Notebook是一个开源的web程序大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。
如果您想要更深入了解deep learningPaddlePaddle书籍一定是您最好的选择。
当您进入容器内之后,只用运行以下命令:
.. code-block:: bash
jupyter notebook
然后在浏览器中输入以下网址:
.. code-block:: text
http://localhost:8888/
就这么简单,享受您的旅程!
非AVX镜像
---------
纯CPU镜像以及GPU镜像都会用到AVX指令集但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
如果输出是No我们就需要手动编译一个非AVX版本的镜像
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
通过Docker容器开发PaddlePaddle
------------------------------
@ -57,67 +137,6 @@ PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Do
ctest
纯CPU和GPU的docker镜像
----------------------
对于每一个PaddlePaddle版本我们都会发布两个Docker镜像纯CPU的和GPU的。我们通过设置 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 自动运行以下两个命令:
.. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
以交互容器方式运行纯CPU的镜像
.. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash
或者,可以以后台进程方式运行容器:
.. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest
然后用密码 :code:`root` SSH进入容器
.. code-block:: bash
ssh -p 2202 root@localhost
SSH方式的一个优点是我们可以从多个终端进入容器。比如一个终端运行vi另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上并在笔记本上通过SSH与其连接。
以上方法在GPU镜像里也能用只是请不要忘记按装CUDA驱动以及告诉Docker
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
非AVX镜像
---------
纯CPU镜像以及GPU镜像都会用到AVX指令集但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
如果输出是No我们就需要手动编译一个非AVX版本的镜像
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
文档
----
@ -128,7 +147,7 @@ Paddle的Docker镜像带有一个通过 `woboq code browser
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu
docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。

@ -9,6 +9,100 @@ Please be aware that you will need to change `Dockers settings
of your hardware resource on Mac OS X and Windows.
Usage of CPU-only and GPU Images
----------------------------------
For each version of PaddlePaddle, we release 2 Docker images, a
CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically generate the latest docker images `paddledev/paddle:0.10.0rc1-cpu`
and `paddledev/paddle:0.10.0rc1-gpu`.
To run the CPU-only image as an interactive container:
.. code-block:: bash
docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
or, we can run it as a daemon container
.. code-block:: bash
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu
and SSH to this container using password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
An advantage of using SSH is that we can connect to PaddlePaddle from
more than one terminals. For example, one terminal running vi and
another one running Python interpreter. Another advantage is that we
can run the PaddlePaddle container on a remote server and SSH to it
from a laptop.
Above methods work with the GPU image too -- just please don't forget
to install CUDA driver and let Docker knows about it:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
PaddlePaddle Book
------------------
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
Once you are inside the container, simply issue the command:
.. code-block:: bash
jupyter notebook
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
Non-AVX Images
--------------
Please be aware that the CPU-only and the GPU images both use the AVX
instruction set, but old computers produced before 2008 do not support
AVX. The following command checks if your Linux computer supports
AVX:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
If it doesn't, we will need to build non-AVX images manually from
source code:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
Development Using Docker
------------------------
@ -82,103 +176,6 @@ Windows -- in a consistent way.
cd /paddle/build
ctest
4. Run PaddlePaddle Book under Docker Container
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
Once you are inside the container, simply issue the command:
.. code-block:: bash
jupyter notebook
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
CPU-only and GPU Images
-----------------------
For each version of PaddlePaddle, we release 2 Docker images, a
CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically runs the following commands:
.. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
To run the CPU-only image as an interactive container:
.. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash
or, we can run it as a daemon container
.. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest
and SSH to this container using password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
An advantage of using SSH is that we can connect to PaddlePaddle from
more than one terminals. For example, one terminal running vi and
another one running Python interpreter. Another advantage is that we
can run the PaddlePaddle container on a remote server and SSH to it
from a laptop.
Above methods work with the GPU image too -- just please don't forget
to install CUDA driver and let Docker knows about it:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
Non-AVX Images
--------------
Please be aware that the CPU-only and the GPU images both use the AVX
instruction set, but old computers produced before 2008 do not support
AVX. The following command checks if your Linux computer supports
AVX:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
If it doesn't, we will need to build non-AVX images manually from
source code:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
Documentation
-------------
@ -194,7 +191,7 @@ container:
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu
docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx

@ -228,16 +228,6 @@
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left" rowspan = "2">度量学习(metric learning)</td><td class="left">external</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left">data_server_port</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left" rowspan = "16">参数服务器(PServer)</td><td class="left">start_pserver</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>

@ -228,16 +228,6 @@ It looks like there are a lot of arguments. However, most of them are for develo
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left" rowspan = "2">metric learning</td><td class="left">external</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left">data_server_port</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left" rowspan = "16">PServer</td><td class="left">start_pserver</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>

@ -180,15 +180,6 @@
 - 用户可以自定义beam search的方法编译成动态库供PaddlePaddle加载。 该参数用于指定动态库路径.
- 类型: string (默认: "", null).
## 度量学习(Metric Learning)
* `--external`
- 指示是否使用外部机器进行度量学习.
- 类型: bool (默认: 0).
* `--data_server_port`
- 数据服务器(data server)的监听端口,主要用在度量学习中.
- 类型: int32 (默认: 21134).
## 数据支持(DataProvider)
* `--memory_threshold_on_load_data`

@ -184,15 +184,6 @@
- Specify shared dynamic library. It can be defined out of paddle by user.
- type: string (default: "", null).
## Metric Learning
* `--external`
- Whether to use external machine for metric learning.
- type: bool (default: 0).
* `--data_server_port`
- Listening port for dserver (data server), dserver is mainly used in metric learning.
- type: int32 (default: 21134).
## DataProvider
* `--memory_threshold_on_load_data`

@ -24,9 +24,6 @@ limitations under the License. */
DEFINE_bool(allow_only_one_model_on_one_gpu,
true,
"If true, do not allow multiple models on one GPU device");
#ifdef PADDLE_METRIC_LEARNING
DECLARE_bool(external);
#endif
namespace paddle {
@ -45,11 +42,7 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config,
trainerBarrier_(FLAGS_trainer_count),
allBarrier_(FLAGS_trainer_count + 1),
inArgsCopied_(false) {
#ifdef PADDLE_METRIC_LEARNING
isPassGrad_ = FLAGS_external;
#else
isPassGrad_ = false;
#endif
numThreads_ = FLAGS_trainer_count;
if (useGpu) {
//! TODO(yuyang18): When useGpu=false && paddle is not compiled with gpu,

@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap,
return false;
}
crf_.reset(new LinearChainCRF(
numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData(), nullptr));
numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData()));
return true;
}

@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2));
parameter_ = parameters_[0];
weight_.reset(new Weight(numClasses_ + 2, numClasses_, parameter_));
// We don't need sequenceStartPositions because each sample of output_ is
// for the cost of one sequence.
@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) {
for (size_t i = 0; i < numSequences; ++i) {
if (i >= crfs_.size()) {
crfs_.emplace_back(numClasses_,
parameter_->getBuf(PARAMETER_VALUE)->getData(),
parameter_->getBuf(PARAMETER_GRADIENT)
? parameter_->getBuf(PARAMETER_GRADIENT)->getData()
: nullptr);
crfs_.emplace_back(numClasses_, weight_->getW()->getData());
}
output_.value->getData()[i] =
crfs_[i].forward(output.value->getData() + numClasses_ * starts[i],
@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) {
const int* starts = label.sequenceStartPositions->getData(false);
int numSequences = label.sequenceStartPositions->getSize() - 1;
bool needWGrad = weight_->getWGrad() ? true : false;
for (int i = 0; i < numSequences; ++i) {
crfs_[i].backward(output.value->getData() + numClasses_ * starts[i],
output.grad->getData() + numClasses_ * starts[i],
label.ids->getData() + starts[i],
starts[i + 1] - starts[i]);
if (weightLayer_) {
real weight = getInputValue(*weightLayer_)->getElement(i, 0);
MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]);
grad->mulScalar(weight);
starts[i + 1] - starts[i],
needWGrad);
real instanceWeight = weightLayer_
? getInputValue(*weightLayer_)->getElement(i, 0)
: real(1.0f);
instanceWeight *= coeff_;
MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]);
grad->add(*crfs_[i].getXGrad(), real(1.0f), instanceWeight);
if (needWGrad) {
weight_->getWGrad()->add(
*crfs_[i].getWGrad(), real(1.0f), instanceWeight);
}
}
if (coeff_ != real(1.0f)) {
output.grad->mulScalar(coeff_);
}
parameter_->incUpdate(callback);
}

@ -38,8 +38,9 @@ protected:
size_t numClasses_;
ParameterPtr parameter_;
std::vector<LinearChainCRF> crfs_;
LayerPtr weightLayer_; // weight for each sequence
real coeff_; // weight for the layer
LayerPtr weightLayer_; // weight for each sequence
std::unique_ptr<Weight> weight_; // parameters
real coeff_; // weight for the layer
};
} // namespace paddle

@ -381,8 +381,7 @@ void Layer::backwardActivation() {
void Layer::forwardDropOut() {
auto& outV = getOutputValue();
if (passType_ == PASS_TRAIN || passType_ == PASS_METRIC_TRAIN ||
passType_ == PASS_METRIC_TRAIN_WITH_NOERROR) {
if (passType_ == PASS_TRAIN) {
// new dropOutMask_ if dropOutMask_ is null ptr
Matrix::resizeOrCreate(dropOutMask_,
outV->getHeight(),

@ -17,18 +17,12 @@ limitations under the License. */
namespace paddle {
LinearChainCRF::LinearChainCRF(int numClasses, real* para, real* grad)
LinearChainCRF::LinearChainCRF(int numClasses, real* para)
: numClasses_(numClasses) {
a_ = Matrix::create(para, 1, numClasses_);
b_ = Matrix::create(para + numClasses_, 1, numClasses_);
w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_);
if (grad) {
da_ = Matrix::create(grad, 1, numClasses_);
db_ = Matrix::create(grad + numClasses_, 1, numClasses_);
dw_ = Matrix::create(grad + 2 * numClasses_, numClasses_, numClasses_);
}
ones_ = Matrix::create(1, numClasses_);
ones_->one();
@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) {
return -ll;
}
void LinearChainCRF::backward(real* x, real* dx, int* s, int length) {
void LinearChainCRF::backward(real* x, int* s, int length, bool needWGrad) {
MatrixPtr matX = Matrix::create(x, length, numClasses_);
MatrixPtr matDX = Matrix::create(dx, length, numClasses_);
MatrixPtr matGrad = Matrix::create(length, numClasses_);
Matrix::resizeOrCreate(matGrad_, length, numClasses_);
Matrix::resizeOrCreate(beta_, length, numClasses_);
real* b = b_->getData();
real* dw = dw_ ? dw_->getData() : nullptr;
if (needWGrad) {
Matrix::resizeOrCreate(matWGrad_, numClasses_ + 2, numClasses_);
matWGrad_->zeroMem();
da_ = matWGrad_->subRowMatrix(0, 1);
db_ = matWGrad_->subRowMatrix(1, 2);
dw_ = matWGrad_->subRowMatrix(2, numClasses_ + 2);
}
real* alpha = alpha_->getData();
real* beta = beta_->getData();
real* expW = expW_->getData();
real* expX = expX_->getData();
real* grad = matGrad->getData();
real* grad = matGrad_->getData();
for (int i = 0; i < numClasses_; ++i) {
beta[(length - 1) * numClasses_ + i] = exp(b[i]);
@ -140,39 +139,38 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) {
normalizeL1(beta + k * numClasses_, numClasses_);
}
matGrad->dotMul(*alpha_, *beta_);
matGrad->rowNormalizeL1(*matGrad);
matGrad_->dotMul(*alpha_, *beta_);
matGrad_->rowNormalizeL1(*matGrad_);
for (int k = 0; k < length; ++k) {
grad[k * numClasses_ + s[k]] -= (real)1;
}
matDX->add(*matGrad);
if (da_) {
da_->add(*matGrad->subMatrix(/* startRow= */ 0, /* numRows= */ 1));
}
if (db_) {
db_->add(*matGrad->subMatrix(/* startRow= */ length - 1, 1));
}
beta_->dotMul(*beta_, *expX_);
beta_->rowNormalizeL1(*beta_);
if (needWGrad) {
da_->add(*matGrad_->subMatrix(/* startRow= */ 0, /* numRows= */ 1));
db_->add(*matGrad_->subMatrix(/* startRow= */ length - 1, 1));
for (int k = 1; dw && k < length; ++k) {
real sum = 0;
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) {
sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] *
beta[k * numClasses_ + j];
beta_->dotMul(*beta_, *expX_);
beta_->rowNormalizeL1(*beta_);
real* dw = dw_->getData();
for (int k = 1; k < length; ++k) {
real sum = 0;
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) {
sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] *
beta[k * numClasses_ + j];
}
}
}
sum = 1 / sum;
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) {
dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] *
alpha[(k - 1) * numClasses_ + i] *
beta[k * numClasses_ + j];
sum = 1 / sum;
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) {
dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] *
alpha[(k - 1) * numClasses_ + i] *
beta[k * numClasses_ + j];
}
}
dw[s[k - 1] * numClasses_ + s[k]] -= (real)1;
}
dw[s[k - 1] * numClasses_ + s[k]] -= (real)1;
}
}

@ -21,7 +21,7 @@ namespace paddle {
class LinearChainCRF {
public:
/**
* The size of para and grad must be \f$(numClasses + 2) * numClasses\f$.
* The size of para must be \f$(numClasses + 2) * numClasses\f$.
* The first numClasses values of para are for starting weights (\f$a\f$).
* The next numClasses values of para are for ending weights (\f$b\f$),
* The remaning values are for transition weights (\f$w\f$).
@ -34,7 +34,7 @@ public:
* all possible
* sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF.
*/
LinearChainCRF(int numClasses, real* para, real* grad);
LinearChainCRF(int numClasses, real* para);
/**
* Calculate the negative log likelihood of s given x.
@ -45,29 +45,45 @@ public:
/**
* Calculate the gradient with respect to x, a, b, and w.
* The gradient of x will be stored in dx.
* backward() can only be called after a corresponding call to forward() with
* the same x, s and length.
* @note The gradient is added to dx and grad (provided at constructor).
* The gradient with respect to a, b, and w will not be calculated if
* needWGrad is false.
* @note Please call getWGrad() and getXGrad() to get the gradient with
* respect to (a, b, w) and x respectively.
*/
void backward(real* x, real* dx, int* s, int length);
void backward(real* x, int* s, int length, bool needWGrad);
/**
* Find the most probable sequence given x. The result will be stored in s.
*/
void decode(real* x, int* s, int length);
/*
* Return the gradient with respect to (a, b, w). It can only be called after
* a corresponding call to backward().
*/
MatrixPtr getWGrad() { return matWGrad_; }
/*
* Return the gradient with respect to x. It can only be called after a
* corresponding call to backward().
*/
MatrixPtr getXGrad() { return matGrad_; }
protected:
int numClasses_;
MatrixPtr a_;
MatrixPtr b_;
MatrixPtr w_;
MatrixPtr matWGrad_;
MatrixPtr da_;
MatrixPtr db_;
MatrixPtr dw_;
MatrixPtr ones_;
MatrixPtr expX_;
MatrixPtr matGrad_;
MatrixPtr alpha_;
MatrixPtr beta_;
MatrixPtr maxX_;

@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad
add_test(NAME test_LayerGrad
COMMAND test_LayerGrad)
################ test_CRFLayerGrad ####################
add_unittest_without_exec(test_CRFLayerGrad
test_CRFLayerGrad.cpp
LayerGradUtil.cpp)
add_test(NAME test_CRFLayerGrad
COMMAND test_CRFLayerGrad)
add_unittest_without_exec(test_ActivationGrad
test_ActivationGrad.cpp
LayerGradUtil.cpp)

@ -0,0 +1,174 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/LinearChainCRF.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
static inline bool getNextSequence(std::vector<int>& seq, int numClasses) {
for (auto& v : seq) {
if (++v < numClasses) {
return true;
}
v = 0;
}
return false;
}
// log(exp(x) + exp(y))
static inline real logSum(real x, real y) {
real maxValue = std::max(x, y);
if (std::isinf(maxValue)) {
return -std::numeric_limits<real>::infinity();
} else {
return maxValue + log(exp(x - maxValue) + exp(y - maxValue));
}
}
static inline std::vector<int> genRandLabels(int numClasses, int length) {
std::vector<int> labels(length);
for (int i = 0; i < length; ++i) {
labels[i] = rand() % numClasses; // NOLINT
}
return labels;
}
TEST(CRFLayer, cost) {
const int numClasses = 4;
CpuVector para(numClasses * (numClasses + 2));
real* a = para.getData();
real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses);
x.randomizeUniform();
para.randnorm(0, 2);
std::vector<int> goldenLabels = genRandLabels(numClasses, length);
real cost = crf.forward(x.getData(), goldenLabels.data(), length);
real logZ = -std::numeric_limits<real>::infinity();
real logNominator = -std::numeric_limits<real>::infinity();
std::vector<int> testResult(length, 0);
do {
real score = a[testResult.front()];
score += x.getElement(0, testResult.front());
for (int k = 1; k < length; ++k) {
score += x.getElement(k, testResult[k]) +
w[numClasses * testResult[k - 1] + testResult[k]];
}
score += b[testResult.back()];
logZ = logSum(logZ, score);
if (goldenLabels == testResult) {
logNominator = score;
}
} while (getNextSequence(testResult, numClasses));
real trueCost = -logNominator + logZ;
real diff = fabs(trueCost - cost);
diff /= fabs(cost) < fabs(trueCost) ? fabs(cost) : fabs(trueCost);
VLOG(1) << "cost=" << cost << " trueCost=" << trueCost << " diff=" << diff
<< std::endl;
if (typeid(real) == typeid(double)) { // NOLINT
EXPECT_LE(diff, 1e-10);
} else {
EXPECT_LE(diff, 5e-3);
}
}
}
}
inline real epsilon() { return typeid(real) == typeid(double) ? 1e-10 : 0.06; }
TestConfig initTestConfig(size_t numClasses, bool withWeight) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(numClasses);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA,
"layer_0",
numClasses,
numClasses * (numClasses + 2)});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SEQUENCE_LABEL, "layer_label", numClasses, 0});
config.layerConfig.add_inputs();
if (withWeight) {
config.inputDefs.push_back({INPUT_DENSE_DIM_DATA, "layer_weight", 1, 0});
config.layerConfig.add_inputs();
}
return config;
}
TEST(Layer, CRFLayer) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ false);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
TEST(Layer, CRFLayerUseWeight) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ true);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
int main(int argc, char** argv) {
initMain(argc, argv);
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) {
}
}
TEST(Layer, CRFLayer) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(10);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 120});
config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
// Not support GPU now
testLayerGrad(config,
"crf",
100,
/* trans */ false,
/* useGpu */ false,
false /*useWeight*/,
0.03 /*epsilon*/);
}
TEST(Layer, CTCLayer) {
TestConfig config;
config.layerConfig.set_type("ctc");

@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) {
real* a = para.getData();
real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData(), nullptr);
LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses);

@ -30,9 +30,6 @@ namespace paddle {
* the first solution arms with sendThreads_/recvThreads_ and sendJobQueue_/
* recvJobQueue_. the second solution use some shared thread pool to manage
* connections.
* In addition to pserver, metric learning also uses network to exchange
* features within multi-machines, so this class just abstracts some basic
* threads and queue buffer creation for them
*/
class BaseClient {
protected:

@ -367,11 +367,8 @@ void ParameterServer2::addGradient(const SendParameterRequest& request,
std::vector<Buffer>* outputBuffers) {
VLOG(1) << "pserver: addGradient";
/// forwardbackward delta from all trainers
/// indicate the fluctuation caused by forwardbackward.
#ifndef PADDLE_METRIC_LEARNING
// @TODO(yanfei):
// add support tuning forwardbackward balance for metric learning
// forwardbackward delta from all trainers
// indicate the fluctuation caused by forwardbackward.
if (!numPassFinishClients_) {
REGISTER_BARRIER_DELTA_SERVER_SET(
*statSet_,
@ -381,7 +378,6 @@ void ParameterServer2::addGradient(const SendParameterRequest& request,
request.forwardbackward_time(),
isSparseServer_ ? "_sparseUpdater" : "_denseUpdater");
}
#endif
{
/// approximately pure network overhead

@ -18,6 +18,7 @@ ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root

@ -18,6 +18,7 @@ ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save