Merge remote-tracking branch 'upstream/master'

avx_docs
liaogang 9 years ago
commit 92ca98d5f5

@ -2,6 +2,9 @@ language: cpp
cache: ccache
sudo: required
dist: trusty
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
addons:
apt:
packages:
@ -16,6 +19,7 @@ addons:
- python2.7-dev
- m4
- libprotobuf-dev
- doxygen
- protobuf-compiler
- python-protobuf
- python-numpy
@ -24,12 +28,10 @@ addons:
- libgflags-dev
- libgtest-dev
before_install:
- pip install wheel protobuf
- pip install wheel protobuf sphinx breathe recommonmark
- sudo paddle/scripts/travis/before_install.sh
script:
- paddle/scripts/travis/build.sh
- paddle/scripts/travis/unittest.sh
- paddle/scripts/travis/make_install.sh
- paddle/scripts/travis/main.sh
notifications:
email:
on_success: change

@ -25,7 +25,7 @@ repo or just head straight to the command line:
```shell
# Clone your fork to your local machine
git clone git@github.com:USERNAME/paddle.git
git clone git@github.com:USERNAME/Paddle.git
```
Then you can start to develop.
@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first:
# see the current configured remote repository
git remote -v
# add upstream repository
git remote add upstream https://github.com/paddle/paddle.git
git remote add upstream https://github.com/baidu/Paddle.git
# verify the new upstream
git remote -v
```

@ -9,6 +9,7 @@ Install PaddlePaddle
:glob:
install_*
internal/install_from_jumbo.md
Build from Source
-----------------

@ -5,3 +5,4 @@ Cluster Train
:glob:
opensource/cluster_train.md
internal/index.md

@ -245,10 +245,10 @@ addto_layer
:members: addto_layer
:noindex:
convex_comb_layer
linear_comb_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: convex_comb_layer
:members: linear_comb_layer
:noindex:
interpolation_layer
@ -280,7 +280,13 @@ tensor_layer
.. automodule:: paddle.trainer_config_helpers.layers
:members: tensor_layer
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
trans_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
@ -341,12 +347,6 @@ rank_cost
:members: rank_cost
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
crf_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers

@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree::
:maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst
install/ubuntu_install.rst
cmake/index.rst

@ -0,0 +1,11 @@
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>

@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------

@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R4:
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining) \
__macro(cudnnBatchNormalizationForwardInference) \
@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != runningMean && NULL == runningInvVar) ||
(NULL == runningMean && NULL != runningInvVar)) {
LOG(FATAL) << "runningMean and runningInvVar can be NULL "
@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_training failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *estimatedMean,
real *estimatedInvVar,
double epsilon) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_inference failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedInvVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != savedMean && NULL == savedInvVar) ||
(NULL == savedMean && NULL != savedInvVar)) {
LOG(FATAL) << "savedMean and savedVar can be NULL "
@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
t_resource.cudnn_handle, mode, &alpha, &beta,
#if CUDNN_VERSION >= 5000
&alpha, &beta,
#endif
xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
bnDesc, scale, scaleGrad, biasGrad, epsilon,
savedMean, savedInvVar));
CHECK_SYNC("hl_batch_norm_backward failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}

@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);

@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label;
output.resizeAndCopyFrom(arguments[0], false);
label.resizeAndCopyFrom(arguments[1], false);
output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions);
CHECK(label.ids);
@ -207,7 +207,7 @@ public:
real err = 0;
err = editDistance(
output.value->getData() + output.value->getWidth() * outputStarts[i],
output.value->getHeight(), output.value->getWidth(),
outputStarts[i+1] - outputStarts[i], output.value->getWidth(),
label.ids->getData() + labelStarts[i],
labelStarts[i + 1] - labelStarts[i]);
@ -224,6 +224,9 @@ public:
for (const std::string& name : config_.input_layers()) {
arguments.push_back(nn.getLayer(name)->getOutput());
}
}
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
numSequences_ += arguments[1].getNumSequences();
}

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "GradientMachine.h"
@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped.
*/
typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback;
const std::vector<real>&)> DropCallback;
/**
* @brief NormOrDropNodeCallback
@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path.
*/
typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback;
std::vector<real>&, real*)> NormOrDropNodeCallback;
/**
* @brief Register beam search control callbacks. Used for prediction.
@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation
int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec;
/**
@ -206,7 +205,10 @@ public:
/**
* @brief Path default ctor, first logProb is 0.
*/
Path() { logProb = 0; seqId = 0; }
Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/**
@ -319,21 +321,33 @@ protected:
};
std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows.
// Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
Info info_;
std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
void createInFrameInfo(const Argument& input, PassType passType);
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
@ -363,6 +377,9 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_;
bool stopBeamSearch_;
@ -415,7 +432,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink(std::vector<int> & machineIdVec);
void createDataOutlink(std::vector<int>& machineIdVec);
/*
* @brief used in beam search, connect previous frame to form recurrent link

@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else {
forwardImp(getInput(0), getInput(1));
@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else {
backwardImp(callback, getInput(0), getInput(1));
}

@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_);
outputH_ * outputW_ * numFilters_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {

@ -21,18 +21,20 @@ limitations under the License. */
namespace paddle {
/**
* @brief A layer for convex weighted average of vectors,
* @brief A layer for weighted sum of vectors,
* which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND
* TRANSLATE
* - Input: the first input contains the convex weights (batchSize x weightDim),
* and the shape of second input is (batchSize x (weightdim*dataDim)).
* - Output: the shape of output is (batchSize x dataDim).
* - Input: the the size of the first input is weightDim,
* and the size of the second input is weightdim * dataDim.
* - Output: the sizeof the output is dataDim
* \f[
* out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)),
* i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1)
* out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)),
* i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1)
* \f]
* Note that the above computation is for one sample. Multiple samples are
* processed in one batch.
*
* The config file api is convex_comb_layer.
* The config file api is linear_comb_layer.
*/
class ConvexCombinationLayer : public Layer {
protected:

@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
MatrixPtr prevOut1 = getInputValue(0);
MatrixPtr prevOut2 = getInputValue(1);
outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_);
outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale());
}
}
@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0),
*getInputValue(1), *getInputGrad(0),
*getInputGrad(1), kCosSimScale_);
*getInputGrad(1), config_.cos_scale());
}
}

@ -36,7 +36,7 @@ namespace paddle {
class CosSimLayer : public Layer {
public:
explicit CosSimLayer(const LayerConfig& config)
: Layer(config), kCosSimScale_(5.0f) {}
: Layer(config) {}
~CosSimLayer() {}
@ -44,8 +44,6 @@ public:
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
const real kCosSimScale_;
};
} // namespace paddle

@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}

@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
create(tmpBiasGrad_, 1, channels_, &betaGrad);
}
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad()) {
create(tmpWGrad_, 1, channels_, &gammaGrad);
}
if (biases_ && biases_->getWGrad()) {
create(tmpBiasGrad_, 1, channels_, &betaGrad);
}
}
hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
ioDesc_, inGrad, bnParamDesc_,
gamma, gammaGrad, betaGrad,
EPS, savedMean, savedInvVar);
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad() && biases_->getWGrad()) {
weight_->getWGrad()->add(*tmpWGrad_);
biases_->getWGrad()->add(*tmpBiasGrad_);
}
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
biases_->getParameterPtr()->incUpdate(callback);

@ -0,0 +1,58 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class PrintLayer : public Layer {
public:
explicit PrintLayer(const LayerConfig& config)
: Layer(config) {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
};
void PrintLayer::forward(PassType passType) {
Layer::forward(passType);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
const auto& argu = getInput(i);
const std::string& name = inputLayers_[i]->getName();
if (argu.value) {
std::ostringstream os;
argu.value->print(os);
LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str();
}
if (argu.ids) {
std::ostringstream os;
argu.ids->print(os, argu.ids->getSize());
LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str();
}
if (auto startPos = argu.sequenceStartPositions) {
std::ostringstream os;
startPos->getVector(false)->print(os, startPos->getSize());
LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str();
}
if (auto subStartPos = argu.subSequenceStartPositions) {
std::ostringstream os;
subStartPos->getVector(false)->print(os, subStartPos->getSize());
LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n"
<< os.str();
}
}
}
REGISTER_LAYER(print, PrintLayer);
} // namespace paddle

@ -52,8 +52,10 @@ public:
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]);
} else {
forwardImp(getInput(0));

@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);

@ -0,0 +1,35 @@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save