Merge branch 'develop' into crf

fix-typo
caoying03 8 years ago
commit 6a630f2798

@ -127,6 +127,7 @@ include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(external/nccl)
include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration
@ -159,7 +160,7 @@ set(EXTERNAL_LIBS
if(WITH_GPU)
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)

@ -62,11 +62,11 @@ else()
FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
message(FATAL_ERROR "Paddle needs cudnn to compile")
endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")

@ -0,0 +1,50 @@
INCLUDE(ExternalProject)
SET(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
INCLUDE_DIRECTORIES(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
if(WITH_DSO)
# If we use DSO, we do not build nccl, just download the dependencies
set(NCCL_BUILD_COMMAND "")
set(NCCL_INSTALL_COMMAND "")
set(NCCL_INSTALL_DIR "")
else()
# otherwise, we build nccl and link it.
set(NCCL_BUILD_COMMAND "make -j 8")
set(NCCL_INSTALL_COMMAND "make install")
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
endif()
ExternalProject_Add(
extern_nccl
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
GIT_TAG "v1.3.4-1"
PREFIX "${NCCL_SOURCE_DIR}"
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
INSTALL_DIR "${NCCL_INSTALL_DIR}"
TEST_COMMAND ""
)
if (WITH_DSO)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
add_library(nccl STATIC ${dummyfile})
else()
add_library(nccl INTERFACE)
endif()
else()
ADD_LIBRARY(nccl STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET nccl PROPERTY IMPORTED_LOCATION
${NCCL_INSTALL_DIR}/lib/libnccl.a)
endif()
add_dependencies(nccl extern_nccl)
LIST(APPEND external_project_dependencies nccl)

@ -174,7 +174,7 @@ decoder_inputs = paddle.layer.fc(
1. 两者都是对梯度的截断,但截断时机不同,前者在 :code:`optimzier` 更新网络参数时应用;后者在激活函数反向计算时被调用;
2. 截断对象不同:前者截断可学习参数的梯度,后者截断回传给前层的梯度;
除此之外,还可以通过减小学习或者对数据进行归一化处理来解决这类问题。
除此之外,还可以通过减小学习或者对数据进行归一化处理来解决这类问题。
5. 如何调用 infer 接口输出多个layer的预测结果
-----------------------------------------------

@ -87,11 +87,8 @@ class OpInfoMap {
}
}
template <typename Callback>
void IterAllInfo(Callback callback) {
for (auto& it : map_) {
callback(it.first, it.second);
}
const std::unordered_map<std::string, const OpInfo>& map() const {
return map_;
}
private:

@ -18,6 +18,10 @@ limitations under the License. */
namespace paddle {
namespace framework {
VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); }
void VarDescBind::SetType(VarDesc::VarType type) { desc_.set_type(type); }
void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
}

@ -75,9 +75,9 @@ class VarDescBind {
int32_t GetLodLevel() const;
VarDesc::VarType GetType() const { return desc_.type(); }
VarDesc::VarType GetType() const;
void SetType(VarDesc::VarType type) { desc_.set_type(type); }
void SetType(VarDesc::VarType type);
bool Persistable() const { return desc_.persistable(); }

@ -126,7 +126,7 @@ void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
copyInVal_ = nullptr;
if (act.grad && algo == algorithm::eltwise_tanh) {
// tanh need save src input for backward
inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
inVal_ = MKLDNNMatrix::create(val_->getPrimitiveDesc());
copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
CHECK(copyInVal_) << "should not be emptry";
pipelineFwd_.push_back(*copyInVal_);
@ -145,7 +145,7 @@ void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
algorithm algo = getAlgo(this->getName());
float alpha = getBwdAlpha();
float beta = getBeta();
grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
grad_ = MKLDNNMatrix::create(val_->getPrimitiveDesc(), act.grad);
auto eng = CPUEngine::Instance().getEngine();
auto bwdDesc = eltwise_bwd::desc(
algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
@ -230,7 +230,7 @@ void MKLDNNActivation::resetFwd(Argument& act) {
int ic = cnt_ / bs / ih / iw;
CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
val_ = MKLDNNMatrix::create(
act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_);
{bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_, act.value);
CHECK(val_);
val_->downSpatial();
}

@ -21,8 +21,8 @@ namespace paddle {
typedef enum {
MKLDNN_BASE = 1, // basical info of MKLDNN
MKLDNN_TESTS = 1, // gtest info of MKLDNN
MKLDNN_SIZES = 2, // size info of MKLDNN
MKLDNN_FMTS = 3, // format info of MKLDNN
MKLDNN_FMTS = 2, // format info of MKLDNN
MKLDNN_SIZES = 3, // size info of MKLDNN
MKLDNN_ALL = 4, // show all info of MKLDNN
} MKLDNN_LOG_LEVEL;

File diff suppressed because it is too large Load Diff

@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std::shared_ptr<conv_fwd::primitive_desc> fwdPD_;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr cpuInVal_;
MKLDNNMatrixPtr cpuInGrad_;
MKLDNNMatrixPtr cpuOutVal_;
MKLDNNMatrixPtr cpuOutGrad_;
// convert handle between CPU device and MKLDNN device
std::shared_ptr<mkldnn::reorder> cvtInVal_;
std::shared_ptr<mkldnn::reorder> cvtInGrad_;
std::shared_ptr<mkldnn::reorder> cvtOutVal_;
std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
// whether the weight has been init
bool hasInitedWgt_;
@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override;
void updateWeights(const UpdateCallback& callback) override;
void convertWeightsFromPaddle() override;
@ -109,26 +96,6 @@ public:
<< ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
}
void printValueFormatFlow() override {
if (cpuInVal_) {
VLOG(MKLDNN_FMTS) << cpuInVal_->getFormat() << " >>>";
}
MKLDNNLayer::printValueFormatFlow();
if (cpuOutVal_) {
VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat();
}
}
void printGradFormatFlow() override {
if (cpuInGrad_) {
VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<";
}
MKLDNNLayer::printGradFormatFlow();
if (cpuOutGrad_) {
VLOG(MKLDNN_FMTS) << " <<< " << cpuOutGrad_->getFormat();
}
}
protected:
/**
* load the dims settings of this conv
@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of input value
*/
void resetInValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void resetWgtBiasValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of output value
*/
void resetOutValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& out);
/**
* reset the backward weight primitive descriptor.
*/
@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of output grad
*/
void resetOutGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void resetWgtBiasGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of input grad
*/
void resetInGrad(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_

@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK(wgtVal_) << "should have been initialized";
bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
auto targetDim = wgtVal_->getDims();
auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
auto srcFmt = hasNoSpatial_ ? format::io : format::ihwo;
wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim);
hasInitedWgt_ = true;
}
@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK(wgtVal_) << "should have been initialized";
bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
auto targetDim = wgtVal_->getDims();
auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
auto dstFmt = hasNoSpatial_ ? format::io : format::ihwo;
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
}
@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD(fwdPD_, in, wgt, bias, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
printValueFormatFlow();
}
void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD(bwdDataPD, in, out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
printGradFormatFlow();
}
void MKLDNNFcLayer::updateInputData() {
inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
@ -139,51 +131,30 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetInValue(in);
resetWgtBiasValue(wgt, bias);
resetOutValue(out);
}
void MKLDNNFcLayer::resetInValue(MKLDNNMatrixPtr& in) {
if (inputIsOnlyMKLDNN()) {
const MatrixPtr& dnnIn = getInputValue(0);
in = std::dynamic_pointer_cast<MKLDNNMatrix>(dnnIn);
CHECK(in) << "Input should be MKLDNNMatrix";
} else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
in = MKLDNNMatrix::create(
cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_);
}
CHECK(in);
in->downSpatial();
}
void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias) {
auto outPD =
MKLDNNMatrix::createPrimitiveDesc({bs_, oc_}, format::nc, engine_);
resetOutValue(out, outPD);
format wgtFmt = format::oihw;
if (inVal_->getFormat() == format::nChw8c) {
if (in->getFormat() == format::nChw8c) {
wgtFmt = format::oIhw8i;
} else if (inVal_->getFormat() == format::nChw16c) {
} else if (in->getFormat() == format::nChw16c) {
wgtFmt = format::oIhw16i;
}
wgt = MKLDNNMatrix::create(
weight_->getW(), {oc_, ic_, ih_, iw_}, wgtFmt, engine_);
auto wgtPD =
MKLDNNMatrix::createPrimitiveDesc({oc_, ic_, ih_, iw_}, wgtFmt, engine_);
resetWithMatrix(wgt, weight_->getW(), wgtPD);
wgt->downSpatial();
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat();
bias = (biases_ && biases_->getW())
? MKLDNNMatrix::create(biases_->getW(), {oc_}, format::x, engine_)
: nullptr;
}
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert, just share data
getOutput(CPU_DEVICE).value->setData(out->getData());
if (biases_ && biases_->getW()) {
auto biasPD = MKLDNNMatrix::createPrimitiveDesc({oc_}, format::x, engine_);
resetWithMatrix(bias, biases_->getW(), biasPD);
} else {
bias = nullptr;
}
output_.value = std::dynamic_pointer_cast<Matrix>(out);
}
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
@ -219,7 +190,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
} else {
fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out));
}
pipeline.push_back(*fwd_);
}
@ -227,44 +197,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetOutGrad(out);
resetWgtBiasGrad(wgt, bias);
resetInGrad(in);
}
void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
output_.grad->setData(cpuOut->getData());
out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc());
}
}
CHECK(inVal_ && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias) {
CHECK(wgtVal_);
wgt = MKLDNNMatrix::create(weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
bias = nullptr;
if (biasVal_ == nullptr) {
return;
}
bias =
MKLDNNMatrix::create(biases_->getWGrad(), biasVal_->getPrimitiveDesc());
}
void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
if (biasVal_) {
resetWithMatrix(bias, biases_->getWGrad(), biasVal_->getPrimitiveDesc());
} else {
bias = nullptr;
}
CHECK(inVal_);
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNFcLayer::resetBwdWgtPD(

@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override;
void updateWeights(const UpdateCallback& callback) override;
void convertWeightsFromPaddle() override;
@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
void resetInValue(MKLDNNMatrixPtr& in);
void resetWgtBiasValue(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias);
void resetOutValue(MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
MKLDNNMatrixPtr wgt,
@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
void resetOutGrad(MKLDNNMatrixPtr& out);
void resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias);
void resetInGrad(MKLDNNMatrixPtr& in);
void resetBwdWgtPD(std::shared_ptr<fc_bwdWgt::primitive_desc>& pd,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD(fwdPD_, in, out);
resetFwdPipeline(pipeline, fwdPD_, in, out);
printValueFormatFlow();
}
void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD(pd, in, out);
resetBwdPipeline(pipeline, pd, in, out);
printGradFormatFlow();
}
void MKLDNNPoolLayer::updateInputData() {
inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
resetInValue(in);
resetOutValue(out);
}
void MKLDNNPoolLayer::resetInValue(MKLDNNMatrixPtr& in) {
if (inputIsOnlyMKLDNN()) {
const MatrixPtr& dnnIn = getInputValue(0);
in = std::dynamic_pointer_cast<MKLDNNMatrix>(dnnIn);
CHECK(in) << "Input should be MKLDNNMatrix";
} else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
in = MKLDNNMatrix::create(
cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_);
}
}
void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
CHECK(inVal_) << "Should reset input value first";
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
out = MKLDNNMatrix::create(
output_.value, outDims, inVal_->getFormat(), engine_);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_ = nullptr;
cvtOutVal_ = nullptr;
if (!outputIsOnlyMKLDNN()) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry";
} else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out;
}
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
}
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
CHECK(in);
auto outPD =
MKLDNNMatrix::createPrimitiveDesc(outDims, in->getFormat(), engine_);
resetOutValue(out, outPD);
}
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
MKLDNNMatrixPtr out) {
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
memory::dims kernels = memory::dims{fh_, fw_};
memory::dims strides = memory::dims{sh_, sw_};
memory::dims padL = memory::dims{ph_, pw_};
@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
pipeline.push_back(*fwd_);
if (cvtOutVal_) {
pipeline.push_back(*cvtOutVal_);
}
}
void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
resetOutGrad(out);
resetInGrad(in);
}
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr;
CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry";
} else {
out = cpuOutGrad_;
}
}
}
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
}
CHECK(inVal_);
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
CHECK(inVal_ && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
pd = nullptr;
if (in == nullptr) {
return;
}
memory::dims kernels = memory::dims{fh_, fw_};
memory::dims strides = memory::dims{sh_, sw_};
memory::dims padL = memory::dims{ph_, pw_};
memory::dims padR = getPaddingR();
CHECK(in);
CHECK(out);
auto bwdDesc = pool_bwd::desc(poolAlgo_,
in->getMemoryDesc(),
@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_);
if (pd == nullptr) {
return;
}
bwdData_ =

@ -38,13 +38,6 @@ protected:
// pooling_avg or pooling_max
mkldnn::algorithm poolAlgo_;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr cpuOutVal_;
MKLDNNMatrixPtr cpuOutGrad_;
// convert handle between CPU device and MKLDNN device
std::shared_ptr<mkldnn::reorder> cvtOutVal_;
std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
// save forward primitive_desc, which can be used backward
std::shared_ptr<pool_fwd::primitive_desc> fwdPD_;
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
@ -74,8 +67,6 @@ public:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override;
void printSizeInfo() override {
MKLDNNLayer::printSizeInfo();
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
@ -90,8 +81,6 @@ protected:
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetInValue(MKLDNNMatrixPtr& in);
void resetOutValue(MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
MKLDNNMatrixPtr out);
@ -106,8 +95,6 @@ protected:
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetOutGrad(MKLDNNMatrixPtr& out);
void resetInGrad(MKLDNNMatrixPtr& in);
void resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out);

@ -97,7 +97,7 @@ void MKLDNNTester::randomWgtDatas() {
parameters_[REF][i]->randomize();
dnnValue->copyFrom(*refValue);
VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName();
VLOG(MKLDNN_TESTS) << "Random weight " << parameters_[DNN][i]->getName();
printVector(dnnValue);
}
}
@ -109,7 +109,7 @@ void MKLDNNTester::randomBotDatas() {
dataLayers_[REF][i]->getOutputValue()->randomizeUniform();
dataLayers_[DNN][i]->getOutputValue()->copyFrom(
*(dataLayers_[REF][i]->getOutputValue()));
VLOG(lvl_) << "Input " << i << " data:";
VLOG(MKLDNN_TESTS) << "Random Foward, InputValue " << i;
printMatrix(dataLayers_[REF][i]->getOutputValue());
}
}
@ -118,12 +118,12 @@ void MKLDNNTester::randomTopDiffs() {
refLayer_->getOutputGrad()->randomizeUniform();
dnnLayer_->getOutput(CPU_DEVICE)
.grad->copyFrom(*(refLayer_->getOutputGrad()));
VLOG(lvl_) << "Random Backward Input, TopDiff: ";
VLOG(MKLDNN_TESTS) << "Random Backward, OutputGrad";
printMatrix(refLayer_->getOutputGrad());
}
void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward";
VLOG(MKLDNN_TESTS) << "Check Forward";
printTopDatas();
double delta =
compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
@ -131,15 +131,15 @@ void MKLDNNTester::checkForward() {
}
void MKLDNNTester::checkBackwardData() {
VLOG(MKLDNN_ALL) << "Check Backward Data";
VLOG(MKLDNN_TESTS) << "Check Backward Data";
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad();
const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad();
VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i;
VLOG(MKLDNN_ALL) << "MKLDNN Backward Result: InputGrad " << i;
printMatrix(dnnDiff);
VLOG(lvl_) << "Reference Backward Output BotDiff " << i;
VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i;
printMatrix(refDiff);
double delta = compareMatrix(dnnDiff, refDiff);
@ -153,7 +153,7 @@ void MKLDNNTester::checkBackwardData() {
}
void MKLDNNTester::checkBackwardWgts() {
VLOG(MKLDNN_ALL) << "Check Backward Weight";
VLOG(MKLDNN_TESTS) << "Check Backward Weight";
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts);
@ -165,9 +165,11 @@ void MKLDNNTester::checkBackwardWgts() {
for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName();
VLOG(MKLDNN_ALL) << "MKLDNN Result: weight value"
<< parameters_[DNN][i]->getName();
printVector(dnn);
VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName();
VLOG(MKLDNN_ALL) << "Reference Result: weight value "
<< parameters_[REF][i]->getName();
printVector(ref);
double delta = compareVector(dnn, ref);
@ -240,7 +242,8 @@ void MKLDNNTester::printTopDatas() {
}
for (int n = 0; n < NUM; ++n) {
VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: ";
VLOG(MKLDNN_ALL) << testLayers_[n]->getType()
<< " Forward Result: OutputValue";
printMatrix(testLayers_[n]->getOutputValue());
}
}
@ -252,7 +255,7 @@ void MKLDNNTester::printMatrix(const MatrixPtr& m) {
std::ostringstream ostr;
m->print(ostr);
VLOG(lvl_) << std::endl << ostr.str();
VLOG(MKLDNN_ALL) << std::endl << ostr.str();
}
void MKLDNNTester::printVector(const VectorPtr& v) {
@ -262,7 +265,7 @@ void MKLDNNTester::printVector(const VectorPtr& v) {
std::ostringstream ostr;
v->print(ostr, v->getSize());
VLOG(lvl_) << std::endl << ostr.str();
VLOG(MKLDNN_ALL) << std::endl << ostr.str();
}
double MKLDNNTester::getDelta(const real* d1,
@ -314,7 +317,7 @@ void MKLDNNTester::runOnce() {
UpdateCallback updateCallback = [](Parameter* para) {
auto& grad = para->getBuf(PARAMETER_GRADIENT);
auto& value = para->getBuf(PARAMETER_VALUE);
real lr = 1e-3;
real lr = 1e-2;
value->add(*grad, lr);
grad->zeroMem();
};
@ -340,10 +343,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
size_t batchSize,
size_t inputImgH,
size_t inputImgW,
bool printDetails,
size_t iter,
float epsilon,
bool log,
int level) {
float epsilon) {
CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 ||
dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0)
<< "should be MKLDNN layer or MKLDNN activation";
@ -359,10 +361,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
ih_ = inputImgH;
iw_ = inputImgW;
log_ = printDetails;
iter_ = iter;
eps_ = epsilon;
log_ = log;
lvl_ = level;
// Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight
reset(dnn, ref, batchSize);
@ -531,9 +532,11 @@ void MKLDNNTester::getOutResult(const std::string& configPath,
void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) {
CHECK_EQ(ref.outValues.size(), dnn.outValues.size());
CHECK_EQ(ref.paraValues.size(), dnn.paraValues.size());
VLOG(MKLDNN_TESTS) << "compare value size: " << ref.outValues.size();
for (size_t i = 0; i < ref.outValues.size(); i++) {
EXPECT_LE(fabs(compareMatrix(ref.outValues[i], dnn.outValues[i])), eps);
}
VLOG(MKLDNN_TESTS) << "compare param size: " << ref.outValues.size();
for (size_t i = 0; i < ref.paraValues.size(); i++) {
EXPECT_LE(fabs(compareVector(ref.paraValues[i], dnn.paraValues[i])), eps);
}
@ -544,9 +547,10 @@ void MKLDNNTester::runBranchesTest(const std::string& configPath,
float eps) {
DataIn in;
initArgument(in, configPath, iter);
DataOut outCpu, outDnn;
VLOG(MKLDNN_TESTS) << "runing cpu network";
getOutResult(configPath, in, outCpu, false, iter);
VLOG(MKLDNN_TESTS) << "runing mkldnn network";
getOutResult(configPath, in, outDnn, true, iter);
compareResult(outCpu, outDnn, eps);

@ -58,8 +58,6 @@ protected:
size_t iter_;
/// whether to print out the details
bool log_;
/// vlog level to print the matrix details datas
int lvl_;
/// epsilon
float eps_;
/// input image size, default 1
@ -70,7 +68,6 @@ public:
iter_ = iter;
eps_ = epsilon;
log_ = false;
lvl_ = MKLDNN_ALL;
}
~MKLDNNTester() {}
@ -81,10 +78,9 @@ public:
size_t batchSize,
size_t inputImgH = 1,
size_t inputImgW = 1,
bool printDetails = false,
size_t iter = 3,
float epsilon = 1e-4,
bool log = false,
int level = MKLDNN_ALL);
float epsilon = 1e-4);
static void runBranchesTest(const std::string& configPath,
size_t iter = 3,
float eps = 1e-4);

@ -51,7 +51,10 @@ def test_sparse_non_value_no_seq(setting, filename):
yield [(i + 1) * (j + 1) for j in xrange(10)]
@provider(input_types=[sparse_vector(30000, seq_type=SequenceType.NO_SEQUENCE)])
@provider(input_types=[
sparse_float_vector(
30000, seq_type=SequenceType.NO_SEQUENCE)
])
def test_sparse_value_no_seq(setting, filename):
for i in xrange(200):
yield [((i + 1) * (j + 1), float(j) / float(i + 1)) for j in xrange(10)]

@ -18,7 +18,7 @@ using namespace mkldnn; // NOLINT
namespace paddle {
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
MKLDNNMatrixPtr MKLDNNMatrix::create(memory::primitive_desc pd, MatrixPtr m) {
memory::desc md = pd.desc();
size_t ndims = md.data.ndims;
int* dims = md.data.dims;
@ -41,12 +41,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
return std::make_shared<MKLDNNMatrix>(cpuMatrix, pd);
}
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
memory::dims dims,
MKLDNNMatrixPtr MKLDNNMatrix::create(memory::dims dims,
memory::format fmt,
engine& eg,
MatrixPtr m,
mkldnn::memory::data_type dtype) {
return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg));
return create(createPrimitiveDesc(dims, fmt, eg, dtype), m);
}
std::shared_ptr<reorder> MKLDNNMatrix::createReorder(const MKLDNNMatrixPtr& src,

@ -40,24 +40,37 @@ public:
/**
* Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
*/
static MKLDNNMatrixPtr create(MatrixPtr m, mkldnn::memory::primitive_desc pd);
static MKLDNNMatrixPtr create(mkldnn::memory::primitive_desc pd,
MatrixPtr m = nullptr);
/**
* Create MKLDNNMatrix from a MatrixPtr and memory details info
*/
static MKLDNNMatrixPtr create(
MatrixPtr m,
mkldnn::memory::dims dims,
mkldnn::memory::format fmt,
mkldnn::engine& eg,
MatrixPtr m = nullptr,
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
/**
* Create primitive descriptor.
* default with f32 dtype
*/
static mkldnn::memory::primitive_desc createPrimitiveDesc(
const mkldnn::memory::dims dims,
const mkldnn::memory::format& fmt,
const mkldnn::engine& eg,
const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) {
return mkldnn::memory::primitive_desc(memory::desc(dims, dtype, fmt), eg);
}
/**
* Create Memory descriptor.
* default with any format and f32 dtype
*/
static mkldnn::memory::desc createMemoryDesc(
const mkldnn::memory::dims& dims,
const mkldnn::memory::dims dims,
const mkldnn::memory::format& fmt = mkldnn::memory::format::any,
const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) {
return mkldnn::memory::desc(dims, dtype, fmt);

@ -115,7 +115,8 @@ set(DEPS_OPS
softmax_with_cross_entropy_op
sum_op
pool_op
pool_with_index_op)
pool_with_index_op
lstm_op)
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
@ -126,6 +127,7 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(sum_op DEPS net_op)
op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS})

@ -114,7 +114,7 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
// im2col
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
im2col(context.device_context(), in_slice, col, strides[0], strides[1],
paddings[0], paddings[1]);
paddings[0], paddings[0], paddings[1], paddings[1]);
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
@ -213,7 +213,8 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
col2im(context.device_context(), in_grad_slice, col, strides[0],
strides[1], paddings[0], paddings[1]);
strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]);
}
}
}
@ -235,7 +236,8 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
im2col(context.device_context(), in_slice, col, strides[0],
strides[1], paddings[0], paddings[1]);
strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]);
// gemm
Tensor filter_grad_slice =

@ -0,0 +1,107 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2dtranspose_op.h"
namespace paddle {
namespace operators {
void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DTransposeOp should not be null.");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
for (size_t i = 0; i < paddings.size(); ++i) {
PADDLE_ENFORCE_EQ(paddings[i], 0,
"No Padding allowed in conv transpose op.");
}
PADDLE_ENFORCE_EQ(in_dims.size(), 4,
"Conv2DTransposeOp input should be 4-D tensor.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 4,
"Conv2DTransposeOp filter should be 4-D tensor.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"input and kernel input dimension should be equal.");
auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2];
auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3];
ctx->SetOutputDim("Output",
{in_dims[0], filter_dims[1], output_height, output_width});
}
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"Input",
"(Tensor) The input tensor of convolution transpose operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of input channels, H and W is the height and width of image.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator."
"The format of the filter tensor is CMHW, where C is the number of "
"output image channels, M is the number of input image channels, "
"H and W is height and width of filter. "
"We enforce groups number == 1 and padding == 0 in "
"convolution transpose Scenario.");
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator."
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides",
"strides of convolution transpose operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"paddings of convolution transpose operator.")
.SetDefault({0, 0});
AddComment(R"DOC(
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
)DOC");
}
void Conv2DTransposeOpGrad::InferShape(
framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv2dtranspose, ops::Conv2DTransposeOp,
ops::Conv2DTransposeOpMaker, conv2dtranspose_grad,
ops::Conv2DTransposeOpGrad);
REGISTER_OP_CPU_KERNEL(
conv2dtranspose,
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv2dtranspose_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save