commit
455d2bd7b1
@ -1,25 +0,0 @@
|
||||
Debian Package installation guide
|
||||
=================================
|
||||
|
||||
PaddlePaddle supports :code:`deb` pacakge. The installation of this :code:`deb` package is tested in ubuntu 14.04, but it should be support other debian based linux, too.
|
||||
|
||||
There are four versions of debian package, :code:`cpu`, :code:`gpu`, :code:`cpu-noavx`, :code:`gpu-noavx`. And :code:`noavx` version is used to support CPU which does not contain :code:`AVX` instructions. The download url of :code:`deb` package is \: https://github.com/baidu/Paddle/releases/
|
||||
|
||||
|
||||
After downloading PaddlePaddle deb packages, you can use :code:`gdebi` install.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
gdebi paddle-*.deb
|
||||
|
||||
If :code:`gdebi` is not installed, you can use :code:`sudo apt-get install gdebi` to install it.
|
||||
|
||||
Or you can use following commands to install PaddlePaddle.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
dpkg -i paddle-*.deb
|
||||
apt-get install -f
|
||||
|
||||
And if you use GPU version deb package, you need to install CUDA toolkit and cuDNN, and set related environment variables(such as LD_LIBRARY_PATH) first. It is normal when `dpkg -i` get errors. `apt-get install -f` will continue install paddle, and install dependences.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
namespace paddle {
|
||||
|
||||
namespace neon {
|
||||
|
||||
inline float32x4_t vld1q_f32_aligned(const float* p) {
|
||||
return vld1q_f32(
|
||||
(const float*)__builtin_assume_aligned(p, sizeof(float32x4_t)));
|
||||
}
|
||||
|
||||
#ifndef __aarch64__
|
||||
inline float32_t vaddvq_f32(float32x4_t a) {
|
||||
float32x2_t v = vadd_f32(vget_high_f32(a), vget_low_f32(a));
|
||||
return vget_lane_f32(vpadd_f32(v, v), 0);
|
||||
}
|
||||
|
||||
inline float32x4_t vmlaq_laneq_f32(float32x4_t a,
|
||||
float32x4_t b,
|
||||
float32x4_t v,
|
||||
const int lane) {
|
||||
return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane));
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace neon
|
||||
} // namespace paddle
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,135 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CrossEntropyOverBeam.h"
|
||||
#include "Layer.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/* This struct stores the beams in all search steps for a single sequence. */
|
||||
struct BeamExpansion {
|
||||
std::vector<MatrixPtr> scores;
|
||||
std::vector<IVectorPtr> seqInfo;
|
||||
|
||||
std::vector<MatrixPtr> candidateIds;
|
||||
std::vector<int> gold;
|
||||
|
||||
std::vector<MatrixPtr> scoreGrad;
|
||||
|
||||
size_t expansionCount;
|
||||
|
||||
explicit BeamExpansion(int n) {
|
||||
expansionCount = n;
|
||||
scores.resize(expansionCount);
|
||||
seqInfo.resize(expansionCount);
|
||||
candidateIds.resize(expansionCount);
|
||||
scoreGrad.resize(expansionCount);
|
||||
|
||||
gold.resize(expansionCount);
|
||||
}
|
||||
};
|
||||
typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
|
||||
|
||||
class CostForOneSequence {
|
||||
public:
|
||||
CostForOneSequence()
|
||||
: beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
|
||||
void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
|
||||
beams_ = bPtr;
|
||||
beamSize_ = beamSize;
|
||||
|
||||
expandedPathScores_.clear();
|
||||
expandedPathScores_.resize(beams_->expansionCount);
|
||||
|
||||
goldRowIds_.clear();
|
||||
goldRowIds_.resize(beams_->expansionCount, 0);
|
||||
goldColIds_.clear();
|
||||
goldColIds_.resize(beams_->expansionCount, -1);
|
||||
}
|
||||
size_t getValidExpansionCount() { return validExpansionCount_; }
|
||||
|
||||
real forward();
|
||||
void backward();
|
||||
|
||||
private:
|
||||
void calValidExpandStep();
|
||||
void constructTotalExpansion();
|
||||
size_t initLastExpansion();
|
||||
real globallyNormalizedScore();
|
||||
|
||||
int getSeqStartPos(size_t beamId, size_t rowId) {
|
||||
CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
|
||||
int* starts = beams_->seqInfo[beamId]->getData();
|
||||
return starts[rowId] - starts[0];
|
||||
}
|
||||
|
||||
size_t beamSize_;
|
||||
size_t validExpansionCount_;
|
||||
bool goldAsExtraPath_;
|
||||
std::vector<int> goldRowIds_;
|
||||
std::vector<int> goldColIds_;
|
||||
|
||||
BeamExpansionPtr beams_;
|
||||
std::vector<std::vector<int>> pathRowIdsInEachBeam_;
|
||||
std::vector<int> parentIdsInBeam_;
|
||||
size_t goldIdsInFinalExpansion_;
|
||||
|
||||
std::vector<MatrixPtr> expandedPathScores_;
|
||||
|
||||
MatrixPtr softmaxOut_;
|
||||
};
|
||||
|
||||
class CrossEntropyOverBeam : public Layer {
|
||||
public:
|
||||
explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
void forward(PassType passType) override;
|
||||
void backward(const UpdateCallback& callback) override;
|
||||
|
||||
private:
|
||||
void checkInputs();
|
||||
void copyInputsToCpu();
|
||||
void resizeOutput();
|
||||
void copyGradToGpu(size_t copyCount);
|
||||
void splitBatchBeams();
|
||||
|
||||
size_t beamExpanCount_;
|
||||
size_t batchSize_;
|
||||
size_t beamSize_;
|
||||
|
||||
/*
|
||||
* the process of constructing beams is not friendly to GPU, currently, this
|
||||
* layer only runs on CPU, if any of its inputs is on GPU memory, then copy
|
||||
* it to CPU memory.
|
||||
*/
|
||||
std::vector<MatrixPtr> candidateScores_;
|
||||
std::vector<MatrixPtr> candidateScoreGrad_;
|
||||
std::vector<MatrixPtr> candidateInBeam_;
|
||||
std::vector<MatrixPtr> gradToInputs_;
|
||||
std::vector<IVectorPtr> goldSequence_;
|
||||
std::vector<std::vector<int>> beamSplitPos_;
|
||||
|
||||
/*
|
||||
* split entire bath of beams into beam per sequnence and store the result
|
||||
* into this member.
|
||||
*/
|
||||
std::vector<BeamExpansion> beamPerSeq_;
|
||||
/* beamCosts_ is used to propagate error in one sequence. */
|
||||
std::vector<CostForOneSequence> beamCosts_;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,144 @@
|
||||
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "MKLDNNMatrix.h"
|
||||
|
||||
using namespace mkldnn; // NOLINT
|
||||
|
||||
namespace paddle {
|
||||
|
||||
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
|
||||
memory::desc md = pd.desc();
|
||||
size_t ndims = md.data.ndims;
|
||||
int* dims = md.data.dims;
|
||||
CHECK(ndims > 0) << "Input dims should not be empty";
|
||||
size_t cnts = 1;
|
||||
for (size_t i = 0; i < ndims; ++i) {
|
||||
cnts *= dims[i];
|
||||
}
|
||||
|
||||
if (m == nullptr) {
|
||||
size_t height = dims[0];
|
||||
size_t width = cnts / dims[0];
|
||||
m = Matrix::create(height, width, false, false);
|
||||
}
|
||||
|
||||
CHECK(m) << " Matrix should not be empty";
|
||||
CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast<CpuMatrix>(m);
|
||||
CHECK(cpuMatrix) << "Only support create from CPU matrix yet";
|
||||
|
||||
CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
|
||||
return std::make_shared<MKLDNNMatrix>(
|
||||
m->getData(), m->getHeight(), m->getWidth(), pd);
|
||||
}
|
||||
|
||||
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
|
||||
memory::dims dims,
|
||||
memory::format fmt,
|
||||
engine& eg,
|
||||
mkldnn::memory::data_type dtype) {
|
||||
return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg));
|
||||
}
|
||||
|
||||
void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m,
|
||||
memory::format srcFmt,
|
||||
memory::dims targetDim) {
|
||||
memory::format dstFmt = getFormat();
|
||||
if (srcFmt == dstFmt) {
|
||||
return;
|
||||
}
|
||||
CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
|
||||
reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim);
|
||||
}
|
||||
|
||||
void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m,
|
||||
memory::format dstFmt,
|
||||
memory::dims targetDim) {
|
||||
memory::format srcFmt = getFormat();
|
||||
if (srcFmt == dstFmt) {
|
||||
return;
|
||||
}
|
||||
CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
|
||||
reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim);
|
||||
}
|
||||
|
||||
void MKLDNNMatrix::reorderOnce(void* srcData,
|
||||
void* dstData,
|
||||
memory::format srcFmt,
|
||||
memory::format dstFmt,
|
||||
memory::dims dm) {
|
||||
CHECK(srcData);
|
||||
CHECK(dstData);
|
||||
MatrixPtr tmpSrc;
|
||||
if (dstData == srcData) {
|
||||
// inplace data
|
||||
size_t sz = 1;
|
||||
for (size_t i = 0; i < dm.size(); ++i) {
|
||||
sz *= dm[i];
|
||||
}
|
||||
tmpSrc = Matrix::create(sz, 1, false, false);
|
||||
tmpSrc->copyFrom((real*)srcData, sz);
|
||||
srcData = tmpSrc->getData();
|
||||
}
|
||||
|
||||
auto dtype = this->getDtype();
|
||||
auto srcMD = memory::desc(dm, dtype, srcFmt);
|
||||
auto dstMD = memory::desc(dm, dtype, dstFmt);
|
||||
|
||||
auto eg = this->getEngine();
|
||||
auto src = memory(memory::primitive_desc(srcMD, eg), srcData);
|
||||
auto dst = memory(memory::primitive_desc(dstMD, eg), dstData);
|
||||
|
||||
auto r = reorder(src, dst);
|
||||
stream(stream::kind::eager).submit({r}).wait();
|
||||
}
|
||||
|
||||
void MKLDNNMatrix::downSpatial() {
|
||||
int fmt = getFormat();
|
||||
if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) {
|
||||
// only support nchw and oihw yet, later can support more like nhwc, ihwo
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(TJ): change H(height) and W(width) if support nhwc or more
|
||||
const int H = 2, W = 3;
|
||||
memory::dims srcDims = getDims();
|
||||
if (srcDims[H] != 1 || srcDims[W] != 1) {
|
||||
// can not down spatial
|
||||
return;
|
||||
}
|
||||
|
||||
memory::dims dstDims = memory::dims{srcDims[0], srcDims[1]};
|
||||
memory::format dstFmt;
|
||||
switch (fmt) {
|
||||
case memory::format::nchw:
|
||||
dstFmt = memory::format::nc;
|
||||
break;
|
||||
case memory::format::oihw:
|
||||
dstFmt = memory::format::oi;
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "unsupported format";
|
||||
}
|
||||
memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
|
||||
memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
|
||||
mkldnn_primitive_t result;
|
||||
mkldnn::error::wrap_c_api(
|
||||
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
|
||||
"could not create a memory primitive");
|
||||
reset(result);
|
||||
set_data_handle(getData());
|
||||
}
|
||||
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue